Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e8066a6
r.kappa: print NAs also for the first category
marisn Sep 9, 2022
195cf44
r.kappa: move calculations to a single location
marisn Nov 24, 2022
7001e18
r.kappa: fix potential integer overflow (thanks to @nilason for solut…
marisn Nov 24, 2022
46c6597
r.kappa: add JSON output option
marisn Dec 1, 2022
a0d4a06
r.kappa: better handle case when maps do not have any common values
marisn Dec 1, 2022
a0e15ca
r.kappa: add documentation for JSON output
marisn Dec 1, 2022
8018d31
r.kappa: remove unused variables
marisn Dec 1, 2022
81f86c0
r.kappa: code after G_fatal_error is unreachable
marisn Dec 2, 2022
12e1481
r.kappa: replace no data value with a constant variable
marisn Dec 2, 2022
e039cf3
r.kappa: remove implicit casts
marisn Dec 2, 2022
3bfaa9f
r.kappa: move from JSON output flag to output format selector
marisn Dec 2, 2022
132966c
r.kappa: use full names of variables
marisn Dec 2, 2022
b1cf41e
r.kappa: no need for quiet flag in tests if output is ignored anyway
marisn Dec 2, 2022
0b21367
r.kappa: expand variable names for better readability
marisn Dec 2, 2022
21f13dd
r.kappa: reindent code with clang-format
marisn Dec 2, 2022
f08caf1
r.kappa: expand function names
marisn Dec 2, 2022
0d5b705
r.kappa: use better summation approach
marisn Dec 3, 2022
cb558c9
r.kappa: implement reporting of MCC
marisn Dec 3, 2022
cb12b56
Merge branch 'main' into r_kappa_mcc
marisn Dec 15, 2022
f40d390
r.kappa: beautify code
marisn Dec 23, 2022
fad9079
Style change according to wishes of @wenzeslaus
marisn Dec 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 50 additions & 8 deletions raster/r.kappa/calc_metrics.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include <stdlib.h>
#include <math.h>
#include <grass/gis.h>
#include <grass/glocale.h>
#include "kappa.h"
#include "local_proto.h"

static int longcomp(const void *aa, const void *bb);
static int collapse(long *l, int n);
static void update_sum(double *sum, double *c, double value);

void calc_metrics(void)
{
Expand All @@ -15,9 +17,12 @@ void calc_metrics(void)
int ncat1, ncat2;
int cndx;
double *pi, *pj, *pii;
double p0 = 0.0, pC = 0.0;
double inter1 = 0.0, inter2 = 0.0;
double p0 = 0.0, pC = 0.0, p0c = 0.0, pCc = 0.0;
double inter1 = 0.0, inter2 = 0.0, inter1c = 0.0, inter2c = 0.0;
int a_i = 0, b_i = 0;
double spktk = 0.0, spktkc = 0.0;
double spk2 = 0.0, spk2c = 0.0, stk2 = 0.0, stk2c = 0.0;
double unrooted;

metrics = (METRICS *)G_malloc(sizeof(METRICS));
if (nstats == 0) {
Expand All @@ -28,6 +33,7 @@ void calc_metrics(void)
metrics->overall_accuracy = 0.0;
metrics->kappa = na_value;
metrics->kappa_variance = na_value;
metrics->mcc = na_value;
return;
}

Expand Down Expand Up @@ -142,10 +148,12 @@ void calc_metrics(void)
else
metrics->producers_accuracy[i] = 100 * (pii[i] / pj[i]);
/* theta 1 */
p0 += pii[i];
update_sum(&p0, &p0c, pii[i]);
/* theta 2 */
pC += pi[i] * pj[i];
update_sum(&pC, &pCc, pi[i] * pj[i]);
}
p0 += p0c;
pC += pCc;
if (pC != 1)
metrics->kappa = (p0 - pC) / (1 - pC);
else
Expand All @@ -158,7 +166,8 @@ void calc_metrics(void)
else
metrics->conditional_kappa[i] =
(pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]);
inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.);
update_sum(&inter1, &inter1c,
pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.));
}

/* kappa variance */
Expand All @@ -170,17 +179,37 @@ void calc_metrics(void)
if (Gstats[l].cats[1] == rlst[i])
b_i = i;
}
inter2 += Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) /
metrics->observations;
update_sum(&inter2, &inter2c,
Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) /
metrics->observations);
}
}
inter1 += inter1c;
inter2 += inter2c;
metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 -
pow((p0 * pC - 2 * pC + p0), 2.)) /
pow((1 - pC), 4.) / metrics->observations;

G_free(pi);
G_free(pj);
G_free(pii);

/* MCC */
for (i = 0; i < ncat; i++) {
update_sum(&spktk, &spktkc, metrics->row_sum[i] * metrics->col_sum[i]);
update_sum(&spk2, &spk2c, metrics->row_sum[i] * metrics->row_sum[i]);
update_sum(&stk2, &stk2c, metrics->col_sum[i] * metrics->col_sum[i]);
}
spktk += spktkc;
spk2 += spk2c;
stk2 += stk2c;
unrooted = (metrics->observations * metrics->observations - spk2) *
(metrics->observations * metrics->observations - stk2);
if (unrooted <= 0.0) {
metrics->mcc = na_value;
return;
}
metrics->mcc =
(metrics->correct * metrics->observations - spktk) / sqrt(unrooted);
};

/* remove repeated values */
Expand Down Expand Up @@ -213,3 +242,16 @@ static int longcomp(const void *aa, const void *bb)

return (*a > *b);
}

/* Implements improved Kahan–Babuska algorithm by Neumaier, A. 1974 */
void update_sum(double *sum, double *c, double value)
{
double tmp = *sum + value;

if (fabs(*sum) >= fabs(value))
*c += (*sum - tmp) + value;
else
*c += (value - tmp) + *sum;

*sum = tmp;
}
1 change: 1 addition & 0 deletions raster/r.kappa/kappa.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct _metrics_
double kappa;
double kappa_variance;
double *conditional_kappa;
double mcc;
};

extern struct Cell_head window;
Expand Down
6 changes: 5 additions & 1 deletion raster/r.kappa/print_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,11 @@ void print_json()
else
fprintf(fd, "%.5f", metrics->conditional_kappa[i]);
}
fprintf(fd, "]\n");
fprintf(fd, "],\n");
if (metrics->mcc == na_value)
fprintf(fd, " \"mcc\": null\n");
else
fprintf(fd, " \"mcc\": %.5f\n", metrics->mcc);

fprintf(fd, "}\n");
if (output != NULL)
Expand Down
9 changes: 6 additions & 3 deletions raster/r.kappa/print_kappa.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,19 @@ void print_kappa(void)
fprintf(fd, "%f\n", metrics->conditional_kappa[i]);
}
fprintf(fd, "\n");
fprintf(fd, "Kappa\t\tKappa Variance\n");
fprintf(fd, "Kappa\t\tKappa Variance\tMCC\n");
if (metrics->kappa == na_value)
fprintf(fd, "NA");
else
fprintf(fd, "%f", metrics->kappa);
if (metrics->kappa_variance == na_value)
fprintf(fd, "\tNA");
else
fprintf(fd, "\t%f", metrics->kappa_variance);
if (metrics->mcc == na_value)
fprintf(fd, "\tNA\n");
else
fprintf(fd, "\t%f\n", metrics->kappa_variance);

fprintf(fd, "\t%f\n", metrics->mcc);
fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n");
fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->observations,
metrics->overall_accuracy);
Expand Down
12 changes: 9 additions & 3 deletions raster/r.kappa/r.kappa.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ <h2>OUTPUT VARIABLES</h2>
<dd>Variance of kappa index. Correctness needs to be validated.</dd>
<dt>Conditional kappa</dt>
<dd>Conditional user's kappa for specified class.</dd>
<dt>MCC</dt>
<dd>Matthews (Mattheus) Correlation Coefficient is implemented
according to Grandini, M., Bagli, E., Visani, G. 2020.
"Metrics for multi-class classification: An overview."</dd>
</dl>

<h2>NOTES</h2>
Expand Down Expand Up @@ -106,9 +110,11 @@ <h2>NOTES</h2>
</ul>

<p>
Some of reported values (Choen's kappa, overall accuracy) can be
Some of reported values (overall accuracy, Choen's kappa, MCC) can be
misleading if cell count among classes is not balanced. See e.g.
Powers, D.M.W., 2012. The Problem with Kappa.
Powers, D.M.W., 2012. "The Problem with Kappa"; Zhu, Q., 2020.
"On the performance of Matthews correlation coefficient (MCC) for
imbalanced dataset".

<H2>EXAMPLE</H2>

Expand Down Expand Up @@ -143,4 +149,4 @@ <h2>SEE ALSO</h2>
<h2>AUTHORS</h2>

Tao Wen, University of Illinois at Urbana-Champaign, Illinois<br>
Maris Nartiss, University of Latvia (JSON output)
Maris Nartiss, University of Latvia (JSON output, MCC)
8 changes: 8 additions & 0 deletions raster/r.kappa/testsuite/test_r_kappa.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def match(self, pat, ref):
# 4 0 14 4 0 - 0.000 0.000 0.222 0.000 0.000
# 5 1 17 0 0 1.000 1.000 0.056 0.056 0.056 1.000
# 6 2 13 2 1 0.667 0.500 0.111 0.222 0.167 0.400
# Correct MCC value was calculated manually and validated with
# mcc function of R package mltools.

def test_standard_output(self):
out = read_command(
Expand Down Expand Up @@ -181,6 +183,7 @@ def test_standard_output(self):
# Kappa value
vals = rows[28].split()
self.assertTrue(self.match(vals[0], 0.52091))
self.assertTrue(self.match(vals[2], 0.55930))

# Overall characteristics
vals = rows[31].split()
Expand Down Expand Up @@ -283,6 +286,7 @@ def test_standard_output(self):
# Kappa value
vals = rows[28].split()
self.assertTrue(self.match(vals[0], 0.0))
self.assertTrue(self.match(vals[2], "NA"))

# Overall characteristics
vals = rows[31].split()
Expand Down Expand Up @@ -341,6 +345,7 @@ def setUpClass(cls):
"producers_accuracy": [57.1429, 0.0, 100.0, None, 100.0, 66.66666],
"users_accuracy": [100.0, None, 80.0, 0.0, 100.0, 50.0],
"conditional_kappa": [1.0, None, 0.742857, 0.0, 1.0, 0.400],
"mcc": 0.55930,
}
)

Expand Down Expand Up @@ -380,6 +385,7 @@ def setUpClass(cls):
"producers_accuracy": [0.0, 0.0, 0.0, 0.0, 0.0, None],
"users_accuracy": [None, None, None, None, None, 0.0],
"conditional_kappa": [None, None, None, None, None, 0.0],
"mcc": None,
}
)

Expand Down Expand Up @@ -410,6 +416,7 @@ def setUpClass(cls):
"producers_accuracy": [],
"users_accuracy": [],
"conditional_kappa": [],
"mcc": None,
}
)

Expand Down Expand Up @@ -440,6 +447,7 @@ def setUpClass(cls):
"producers_accuracy": [],
"users_accuracy": [],
"conditional_kappa": [],
"mcc": None,
}
)

Expand Down