From e8066a604ae01e58fdd2b5c512de84b2f4609e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 9 Sep 2022 17:39:00 +0300 Subject: [PATCH 01/20] r.kappa: print NAs also for the first category Although original code lacks any explanation why NA should not be printed for the first raster category, I do suspect it stems from idea that the first cat is 0 and before proper NULL support 0 was "no data" value. --- raster/r.kappa/calc_kappa.c | 124 ------------------------------------ 1 file changed, 124 deletions(-) delete mode 100644 raster/r.kappa/calc_kappa.c diff --git a/raster/r.kappa/calc_kappa.c b/raster/r.kappa/calc_kappa.c deleted file mode 100644 index 0d252110da7..00000000000 --- a/raster/r.kappa/calc_kappa.c +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include "kappa.h" -#include "local_proto.h" - - -void calc_kappa(void) -{ - int i, j; - int a_i, b_i; - int s, l; - size_t ns; - double *pi, *pj, *pii, p0, pC; - double kp, vkp, *kpp; - double obs, inter1, inter2; - long total; - FILE *fd; - - /* initialize */ - s = 0; - l = -1; - ns = nstats; - obs = 0; - inter1 = inter2 = 0; - p0 = pC = 0; - - if (output == NULL) - fd = stdout; - else if ((fd = fopen(output, "a")) == NULL) { - G_fatal_error(_("Cannot open file <%s> to write kappa and relevant parameters"), - output); - return; - } - - total = count_sum(&s, l); - - /* calculate the parameters of the kappa-calculation */ - pi = (double *)G_calloc(ncat, sizeof(double)); - pj = (double *)G_calloc(ncat, sizeof(double)); - pii = (double *)G_calloc(ncat, sizeof(double)); - kpp = (double *)G_calloc(ncat, sizeof(double)); - - for (i = 0; i < ncat; i++) { - for (j = 0; j < ns; j++) { - if (Gstats[j].cats[0] == rlst[i]) - pi[i] += Gstats[j].count; - - if (Gstats[j].cats[1] == rlst[i]) - pj[i] += Gstats[j].count; - - if ((Gstats[j].cats[0] == Gstats[j].cats[1]) && - (Gstats[j].cats[0] == rlst[i])) - pii[i] += Gstats[j].count; - } - obs += pii[i]; - } - - for (i = 0; i < ncat; i++) { - pi[i] = pi[i] / total; - pj[i] = pj[i] / total; - pii[i] = pii[i] / total; - p0 += pii[i]; - pC += pi[i] * pj[i]; - } - - for (i = 0; i < ncat; i++) { - if (pi[i] == 0) - kpp[i] = -999; - else - kpp[i] = (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); - } - - /* print out the comission and omission accuracy, and conditional kappa */ - fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); - for (i = 0; i < ncat; i++) { - fprintf(fd, "%ld\t", rlst[i]); - if (pi[i] == 0) - fprintf(fd, "NA\t\t"); - else - fprintf(fd, "%f\t", 100 * (1 - pii[i] / pi[i])); - if (pj[i] == 0) - fprintf(fd, "NA\t\t"); - else - fprintf(fd, "%f\t", 100 * (1 - pii[i] / pj[i])); - if (kpp[i] == -999) - fprintf(fd, "NA\n"); - else - fprintf(fd, "%f\n", kpp[i]); - } - fprintf(fd, "\n"); - - for (i = 0; i < ncat; i++) { - inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); - } - - for (j = 0; j < ns; j++) { - if (Gstats[j].cats[0] != Gstats[j].cats[1]) { - for (i = 0; i < ncat; i++) { - if (Gstats[j].cats[0] == rlst[i]) - a_i = i; - if (Gstats[j].cats[1] == rlst[i]) - b_i = i; - } - inter2 += Gstats[j].count * pow((pi[a_i] + pj[b_i]), 2.) / total; - } - } - kp = (p0 - pC) / (1 - pC); - vkp = (inter1 + pow((1 - p0), 2.) * inter2 - - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), 4.) / total; - fprintf(fd, "Kappa\t\tKappa Variance\n"); - fprintf(fd, "%f\t%f\n", kp, vkp); - - fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); - fprintf(fd, "%ld\t\t%ld\t\t%f\n", (long)obs, total, (100. * obs / total)); - if (output != NULL) - fclose(fd); - G_free(pi); - G_free(pj); - G_free(pii); - G_free(kpp); - /* print labels for categories of maps */ - prt_label(); -} From 195cf446eaf56e5d30fb29088c1db143f936b496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 24 Nov 2022 17:28:22 +0200 Subject: [PATCH 02/20] r.kappa: move calculations to a single location --- raster/r.kappa/calc_metrics.c | 203 +++++++++++++++++++++++ raster/r.kappa/kappa.h | 19 +++ raster/r.kappa/local_proto.h | 10 +- raster/r.kappa/main.c | 13 +- raster/r.kappa/prt2csv_mat.c | 105 +----------- raster/r.kappa/prt_kappa.c | 57 +++++++ raster/r.kappa/prt_mat.c | 103 +----------- raster/r.kappa/sum.c | 49 ------ raster/r.kappa/testsuite/test_r_kappa.py | 4 +- 9 files changed, 312 insertions(+), 251 deletions(-) create mode 100644 raster/r.kappa/calc_metrics.c create mode 100644 raster/r.kappa/prt_kappa.c delete mode 100644 raster/r.kappa/sum.c diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c new file mode 100644 index 00000000000..95467988402 --- /dev/null +++ b/raster/r.kappa/calc_metrics.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + +static int longcomp(const void *aa, const void *bb); +static int collapse(long *l, int n); + + +void calc_metrics(void) +{ + int i, j, k; + long *clst; + int ncat1, ncat2; + int cndx; + double *pi, *pj, *pii; + double p0 = 0, pC = 0; + double inter1 = 0, inter2 = 0; + int a_i, b_i; + + /* get the cat lists */ + rlst = (long *)G_calloc(nstats * 2, sizeof(long)); + clst = (long *)G_calloc(nstats, sizeof(long)); + for (i = 0; i < nstats; i++) { + rlst[i] = Gstats[i].cats[0]; + clst[i] = Gstats[i].cats[1]; + } + + /* sort the cat lists */ + qsort(rlst, nstats, sizeof(long), longcomp); + qsort(clst, nstats, sizeof(long), longcomp); + + /* remove repeated cats */ + ncat1 = collapse(rlst, nstats); + ncat2 = collapse(clst, nstats); + + /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ + for (i = 0; i < ncat2; i++) + rlst[ncat1 + i] = clst[i]; + qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); + ncat = collapse(rlst, ncat1 + ncat2); + rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); + G_free(clst); + + /* allocate matrix and fill in with cats' value */ + metrics = (METRICS *) G_malloc(sizeof(METRICS)); + metrics->matrix = (long *)G_malloc(ncat * ncat * sizeof(long)); + for (i = 0; i < ncat * ncat; i++) + metrics->matrix[i] = 0; + for (i = 0; i < nstats; i++) { + for (j = 0; j < ncat; j++) + if (rlst[j] == Gstats[i].cats[0]) + break; + for (k = 0; k < ncat; k++) + if (rlst[k] == Gstats[i].cats[1]) + break; + /* matrix: reference in columns, classification in rows */ + metrics->matrix[j * ncat + k] = Gstats[i].count; + } + + /* Calculate marginals */ + metrics->obs = 0; + metrics->correct = 0; + metrics->colsum = (long *)G_malloc(ncat * sizeof(long)); + metrics->rowsum = (long *)G_malloc(ncat * sizeof(long)); + for (cndx = 0; cndx < ncat; cndx++) { + long t_col = 0; + long t_row = 0; + long x = cndx; + + for (k = 0; k < ncat; k++) { + t_col += metrics->matrix[x]; + x += ncat; + t_row += metrics->matrix[cndx * ncat + k]; + } + metrics->obs += t_row; + metrics->colsum[cndx] = t_col; + metrics->rowsum[cndx] = t_row; + } + if (metrics->obs == 0) { + metrics->total_acc = 0; + metrics->kappa = -999; + metrics->kappa_var = -999; + return; + } + + /* Calculate kappa values */ + pi = (double *)G_calloc(ncat, sizeof(double)); + pj = (double *)G_calloc(ncat, sizeof(double)); + pii = (double *)G_calloc(ncat, sizeof(double)); + metrics->cond_kappa = (double *)G_calloc(ncat, sizeof(double)); + metrics->user_acc = (double *)G_calloc(ncat, sizeof(double)); + metrics->prod_acc = (double *)G_calloc(ncat, sizeof(double)); + + for (i = 0; i < ncat; i++) { + for (j = 0; j < nstats; j++) { + if (Gstats[j].cats[0] == rlst[i]) { + pi[i] += Gstats[j].count; + } + + if (Gstats[j].cats[1] == rlst[i]) { + pj[i] += Gstats[j].count; + } + + if ((Gstats[j].cats[0] == Gstats[j].cats[1]) && + (Gstats[j].cats[0] == rlst[i])) { + pii[i] += Gstats[j].count; + } + } + metrics->correct += pii[i]; + } + + metrics->total_acc = 100. * metrics->correct / metrics->obs; + + /* turn observations into probabilities */ + for (i = 0; i < ncat; i++) { + pi[i] = pi[i] / metrics->obs; + pj[i] = pj[i] / metrics->obs; + pii[i] = pii[i] / metrics->obs; + if (pi[i] == 0) + metrics->user_acc[i] = -999; + else + metrics->user_acc[i] = 100 * (1 - pii[i] / pi[i]); + if (pj[i] == 0) + metrics->prod_acc[i] = -999; + else + metrics->prod_acc[i] = 100 * (1 - pii[i] / pj[i]); + /* theta 1 */ + p0 += pii[i]; + /* theta 2 */ + pC += pi[i] * pj[i]; + } + if (pC != 1) + metrics->kappa = (p0 - pC) / (1 - pC); + else + metrics->kappa = -999; + + /* conditional kappa */ + for (i = 0; i < ncat; i++) { + if (pi[i] == 0 || (pi[i] == 1 && pj[i] == 1)) + metrics->cond_kappa[i] = -999; + else + metrics->cond_kappa[i] = + (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); + inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); + } + + /* kappa variance */ + for (j = 0; j < nstats; j++) { + if (Gstats[j].cats[0] != Gstats[j].cats[1]) { + for (i = 0; i < ncat; i++) { + if (Gstats[j].cats[0] == rlst[i]) + a_i = i; + if (Gstats[j].cats[1] == rlst[i]) + b_i = i; + } + inter2 += + Gstats[j].count * pow((pi[a_i] + pj[b_i]), 2.) / metrics->obs; + } + } + metrics->kappa_var = (inter1 + pow((1 - p0), 2.) * inter2 - + pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), + 4.) / + metrics->obs; + + G_free(pi); + G_free(pj); + G_free(pii); +}; + + +/* remove repeated values */ +static int collapse(long *l, int n) +{ + long *c; + int m; + + c = l; + m = 1; + while (n-- > 0) { + if (*c != *l) { + c++; + *c = *l; + m++; + } + l++; + } + + return m; +} + + +static int longcomp(const void *aa, const void *bb) +{ + const long *a = aa; + const long *b = bb; + + if (*a < *b) + return -1; + + return (*a > *b); +} diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 1519c9b01f7..804f9eec688 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -15,6 +15,22 @@ struct _layer_ struct Categories labels; }; +struct _metrics_ +{ + int ncat; + long obs; + long correct; + long *matrix; + long *rowsum; + long *colsum; + double total_acc; + double *prod_acc; + double *user_acc; + double kappa; + double kappa_var; + double *cond_kappa; +}; + extern struct Cell_head window; extern const char *maps[2]; @@ -32,3 +48,6 @@ extern int nlayers; #define GSTATS struct _gstats_ extern GSTATS *Gstats; extern size_t nstats; + +#define METRICS struct _metrics_ +extern METRICS *metrics; diff --git a/raster/r.kappa/local_proto.h b/raster/r.kappa/local_proto.h index 5f87285716f..36bdb7bd0f8 100644 --- a/raster/r.kappa/local_proto.h +++ b/raster/r.kappa/local_proto.h @@ -1,5 +1,5 @@ -/* calc_kappa.c */ -void calc_kappa(void); +/* prt_kappa.c */ +void prt_kappa(void); /* mask.c */ char *maskinfo(void); @@ -14,10 +14,10 @@ void prt_label(void); void prn_error_mat(int out_cols, int hdr); /* prt2csv_mat.c */ -void prn2csv_error_mat(int out_cols, int hdr); +void prn2csv_error_mat(int hdr); /* stats.c */ int stats(void); -/* sum.c */ -long count_sum(int *ns, int n1); +/* calc_metrics.c */ +void calc_metrics(void); diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index da6e276e0b3..26475bb0265 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -45,6 +45,8 @@ int nlayers; GSTATS *Gstats; size_t nstats; +METRICS *metrics; + /* function prototypes */ static void layer(const char *s); @@ -59,7 +61,7 @@ int main(int argc, char **argv) struct { - struct Flag *m, *w, *h; + struct Flag *m, *w, *h, *j; } flags; G_gisinit(argv[0]); @@ -113,6 +115,11 @@ int main(int argc, char **argv) flags.m->description = _("Print Matrix only"); flags.m->guisection = _("Output settings"); + flags.j = G_define_flag(); + flags.j->key = 'j'; + flags.j->description = _("Print measures in JSON"); + flags.j->guisection = _("Output settings"); + if (G_parser(argc, argv)) exit(EXIT_FAILURE); @@ -132,7 +139,7 @@ int main(int argc, char **argv) if (flags.m->answer) { /* prepare the data for calculation */ - prn2csv_error_mat(2048, flags.h->answer); + prn2csv_error_mat(flags.h->answer); } else { /* print header of the output */ @@ -143,7 +150,7 @@ int main(int argc, char **argv) prn_error_mat(flags.w->answer ? 132 : 80, flags.h->answer); /* generate the error matrix, kappa and variance */ - calc_kappa(); + prt_kappa(); } return EXIT_SUCCESS; } diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c index 4f208b5838b..5e9f6570219 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/prt2csv_mat.c @@ -5,22 +5,13 @@ #include "local_proto.h" -static int longcomp(const void *aa, const void *bb); -static int collapse(long *l, int n); - - -void prn2csv_error_mat(int out_cols, int hdr) +void prn2csv_error_mat(int hdr) { - int i, j, k; - int ncat1, ncat2; - long x; - long *clst; + int j; int cndx, rndx; - int first_col = 0, last_col = 0; + int first_col, last_col; int thisone; - long t_row, t_col; - long t_rowcount; FILE *fd; long *cats; @@ -41,48 +32,9 @@ void prn2csv_error_mat(int out_cols, int hdr) return; } else { - /* get the cat lists */ - rlst = (long *)G_calloc(nstats * 2, sizeof(long)); - clst = (long *)G_calloc(nstats, sizeof(long)); - for (i = 0; i < nstats; i++) { - rlst[i] = Gstats[i].cats[0]; - clst[i] = Gstats[i].cats[1]; - } - - /* sort the cat lists */ - qsort(rlst, nstats, sizeof(long), longcomp); - qsort(clst, nstats, sizeof(long), longcomp); - - /* remove repeated cats */ - ncat1 = collapse(rlst, nstats); - ncat2 = collapse(clst, nstats); - - /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ - for (i = 0; i < ncat2; i++) - rlst[ncat1 + i] = clst[i]; - qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); - ncat = collapse(rlst, ncat1 + ncat2); - rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); - G_free(clst); - - /* allocate matrix and fill in with cats' value */ - matr = (long *)G_malloc(ncat * ncat * sizeof(long)); - for (i = 0; i < ncat * ncat; i++) - matr[i] = 0; - for (i = 0; i < nstats; i++) { - for (j = 0; j < ncat; j++) - if (rlst[j] == Gstats[i].cats[0]) - break; - for (k = 0; k < ncat; k++) - if (rlst[k] == Gstats[i].cats[1]) - break; - /* matrix: reference in columns, classification in rows */ - matr[j * ncat + k] = Gstats[i].count; - } + calc_metrics(); /* format and print out the error matrix in panels */ - out_cols = 2048; - t_rowcount = 0; first_col = 0; last_col = ncat; /* name line */ @@ -117,62 +69,21 @@ void prn2csv_error_mat(int out_cols, int hdr) /* entries */ for (cndx = first_col; cndx < last_col; cndx++) { thisone = (ncat * rndx) + cndx; - fprintf(fd, "%ld\t", matr[thisone]); + fprintf(fd, "%ld\t", metrics->matrix[thisone]); } /* row marginal summation */ - t_row = 0; - for (k = 0; k < ncat; k++) - t_row += matr[rndx * ncat + k]; - t_rowcount += t_row; - fprintf(fd, "%ld", t_row); + fprintf(fd, "%ld", metrics->rowsum[rndx]); fprintf(fd, "\n"); } /* column marginal summation */ fprintf(fd, "ColSum\t"); for (cndx = first_col; cndx < last_col; cndx++) { - t_col = 0; - x = cndx; - for (k = 0; k < ncat; k++) { - t_col += matr[x]; - x += ncat; - } - fprintf(fd, "%ld\t", t_col); + fprintf(fd, "%ld\t", metrics->colsum[cndx]); } /* grand total */ - fprintf(fd, "%ld", t_rowcount); + fprintf(fd, "%ld", metrics->obs); fprintf(fd, "\n\n"); - G_free(matr); if (output != NULL) fclose(fd); } } - - -/* remove repeated values */ -static int collapse(long *l, int n) -{ - long *c; - int m; - - c = l; - m = 1; - while (n-- > 0) { - if (*c != *l) { - c++; - *c = *l; - m++; - } - l++; - } - - return m; -} - - -static int longcomp(const void *aa, const void *bb) -{ - const long *a = aa; - const long *b = bb; - - return (*a - *b); -} diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c new file mode 100644 index 00000000000..585178ad250 --- /dev/null +++ b/raster/r.kappa/prt_kappa.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + + +void prt_kappa(void) +{ + int i; + FILE *fd; + + if (output == NULL) + fd = stdout; + else if ((fd = fopen(output, "a")) == NULL) { + G_fatal_error(_("Cannot open file <%s> to write kappa and relevant parameters"), + output); + return; + } + + /* print out the comission and omission accuracy, and conditional kappa */ + fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); + for (i = 0; i < ncat; i++) { + fprintf(fd, "%ld\t", rlst[i]); + if (metrics->user_acc[i] == -999) + fprintf(fd, "NA\t\t"); + else + fprintf(fd, "%f\t", metrics->user_acc[i]); + if (metrics->prod_acc[i] == -999) + fprintf(fd, "NA\t\t"); + else + fprintf(fd, "%f\t", metrics->prod_acc[i]); + if (metrics->cond_kappa[i] == -999) + fprintf(fd, "NA\n"); + else + fprintf(fd, "%f\n", metrics->cond_kappa[i]); + } + fprintf(fd, "\n"); + fprintf(fd, "Kappa\t\tKappa Variance\n"); + if (metrics->kappa == -999) + fprintf(fd, "NA"); + else + fprintf(fd, "%f", metrics->kappa); + if (metrics->kappa_var == -999) + fprintf(fd, "\tNA\n"); + else + fprintf(fd, "\t%f\n", metrics->kappa_var); + + fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); + fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->obs, + metrics->total_acc); + if (output != NULL) + fclose(fd); + + /* print labels for categories of maps */ + prt_label(); +} diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index 1314e9a15c4..d751150997f 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -5,16 +5,10 @@ #include "local_proto.h" -static int longcomp(const void *aa, const void *bb); -static int collapse(long *l, int n); - - void prn_error_mat(int out_cols, int hdr) { - int i, j, k; - int ncat1, ncat2; + int k; long x; - long *clst; int num_panels, at_panel; int cndx, rndx; @@ -41,44 +35,7 @@ void prn_error_mat(int out_cols, int hdr) return; } else { - /* get the cat lists */ - rlst = (long *)G_calloc(nstats * 2, sizeof(long)); - clst = (long *)G_calloc(nstats, sizeof(long)); - for (i = 0; i < nstats; i++) { - rlst[i] = Gstats[i].cats[0]; - clst[i] = Gstats[i].cats[1]; - } - - /* sort the cat lists */ - qsort(rlst, nstats, sizeof(long), longcomp); - qsort(clst, nstats, sizeof(long), longcomp); - - /* remove repeated cats */ - ncat1 = collapse(rlst, nstats); - ncat2 = collapse(clst, nstats); - - /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ - for (i = 0; i < ncat2; i++) - rlst[ncat1 + i] = clst[i]; - qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); - ncat = collapse(rlst, ncat1 + ncat2); - rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); - G_free(clst); - - /* allocate matrix and fill in with cats' value */ - matr = (long *)G_malloc(ncat * ncat * sizeof(long)); - for (i = 0; i < ncat * ncat; i++) - matr[i] = 0; - for (i = 0; i < nstats; i++) { - for (j = 0; j < ncat; j++) - if (rlst[j] == Gstats[i].cats[0]) - break; - for (k = 0; k < ncat; k++) - if (rlst[k] == Gstats[i].cats[1]) - break; - /* matrix: reference in columns, classification in rows */ - matr[j * ncat + k] = Gstats[i].count; - } + calc_metrics(); /* format and print out the error matrix in panels */ out_cols = (out_cols == 132) ? 9 : 5; @@ -122,32 +79,22 @@ void prn_error_mat(int out_cols, int hdr) /* entries */ for (cndx = first_col; cndx < last_col; cndx++) { thisone = (ncat * rndx) + cndx; - fprintf(fd, "%ld\t", matr[thisone]); + fprintf(fd, "%ld\t", metrics->matrix[thisone]); } /* row marginal summation */ if (addflag) { - t_row = 0; - for (k = 0; k < ncat; k++) - t_row += matr[rndx * ncat + k]; - t_rowcount += t_row; - fprintf(fd, "%ld", t_row); + fprintf(fd, "%ld", metrics->rowsum[rndx]); } fprintf(fd, "\n"); } /* column marginal summation */ fprintf(fd, "Col Sum\t\t"); for (cndx = first_col; cndx < last_col; cndx++) { - t_col = 0; - x = cndx; - for (k = 0; k < ncat; k++) { - t_col += matr[x]; - x += ncat; - } - fprintf(fd, "%ld\t", t_col); + fprintf(fd, "%ld\t", metrics->colsum[cndx]); } /* grand total */ if (addflag) - fprintf(fd, "%ld", t_rowcount); + fprintf(fd, "%ld", metrics->obs); fprintf(fd, "\n\n"); } @@ -165,8 +112,8 @@ void prn_error_mat(int out_cols, int hdr) fprintf(fd, " %5ld", rlst[rndx]); for (cndx = first_col; cndx < last_col; cndx++) { thisone = (ncat * rndx) + cndx; - fprintf(fd, " %9ld ", matr[thisone]); - t_row += matr[thisone]; + fprintf(fd, " %9ld ", metrics->matrix[thisone]); + t_row += metrics->matrix[thisone]; } t_rowcount += t_row; grand_count += t_rowcount; @@ -174,41 +121,7 @@ void prn_error_mat(int out_cols, int hdr) } fprintf(fd, "%9ld\n", grand_count); } - G_free(matr); if (output != NULL) fclose(fd); } } - - -/* remove repeated values */ -static int collapse(long *l, int n) -{ - long *c; - int m; - - c = l; - m = 1; - while (n-- > 0) { - if (*c != *l) { - c++; - *c = *l; - m++; - } - l++; - } - - return m; -} - - -static int longcomp(const void *aa, const void *bb) -{ - const long *a = aa; - const long *b = bb; - - if (*a < *b) - return -1; - - return (*a > *b); -} diff --git a/raster/r.kappa/sum.c b/raster/r.kappa/sum.c deleted file mode 100644 index 9ac48fa16aa..00000000000 --- a/raster/r.kappa/sum.c +++ /dev/null @@ -1,49 +0,0 @@ -#include "kappa.h" -#include "local_proto.h" - - -/* function prototypes */ -static int same_cats(int a, int b, int nl); - - -/* within group totals: - *ns is the first stat - (updated upon return to point to next stat) - nl is the layer number (or level) */ - -long count_sum(int *ns, int nl) -{ - long count; - int k, n; - - k = n = *ns; - count = 0; - - if (nl >= 0) { - while (n < nstats && same_cats(k, n, nl)) - count += Gstats[n++].count; - } - else { - while (n < nstats) - count += Gstats[n++].count; - } - - *ns = n; - - return count; -} - - -static int same_cats(int a, int b, int nl) -{ - long *cat_a, *cat_b; - - cat_a = Gstats[a].cats; - cat_b = Gstats[b].cats; - - while (nl-- >= 0) - if (*cat_a++ != *cat_b++) - return 0; - - return 1; -} diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index fc85a3c1a4e..8cbbab64211 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -230,8 +230,8 @@ def setUpClass(cls): def tearDownClass(cls): """Remove temporary data""" cls.del_temp_region() - # cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) - # cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) def match(self, pat, ref): if pat == "NA" or ref == "NA": From 7001e18851a31a2b5ee678eed2e00d480ee4c657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 24 Nov 2022 23:06:33 +0200 Subject: [PATCH 03/20] r.kappa: fix potential integer overflow (thanks to @nilason for solution) --- raster/r.kappa/calc_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 95467988402..47f10764314 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -45,7 +45,7 @@ void calc_metrics(void) /* allocate matrix and fill in with cats' value */ metrics = (METRICS *) G_malloc(sizeof(METRICS)); - metrics->matrix = (long *)G_malloc(ncat * ncat * sizeof(long)); + metrics->matrix = (long *)G_malloc((size_t)ncat * ncat * sizeof(long)); for (i = 0; i < ncat * ncat; i++) metrics->matrix[i] = 0; for (i = 0; i < nstats; i++) { From 46c65977997a1aabd787f56f583bba6867c5f068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 1 Dec 2022 15:05:53 +0200 Subject: [PATCH 04/20] r.kappa: add JSON output option --- raster/r.kappa/calc_metrics.c | 6 +- raster/r.kappa/kappa.h | 1 - raster/r.kappa/local_proto.h | 3 + raster/r.kappa/main.c | 12 ++- raster/r.kappa/prt2csv_mat.c | 2 - raster/r.kappa/prt_json.c | 94 +++++++++++++++++ raster/r.kappa/prt_kappa.c | 4 +- raster/r.kappa/prt_mat.c | 2 - raster/r.kappa/testsuite/test_r_kappa.py | 124 +++++++++++++++++++++++ 9 files changed, 236 insertions(+), 12 deletions(-) create mode 100644 raster/r.kappa/prt_json.c diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 47f10764314..21070ba57d7 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -43,7 +43,7 @@ void calc_metrics(void) rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); G_free(clst); - /* allocate matrix and fill in with cats' value */ + /* fill matrix with observed counts */ metrics = (METRICS *) G_malloc(sizeof(METRICS)); metrics->matrix = (long *)G_malloc((size_t)ncat * ncat * sizeof(long)); for (i = 0; i < ncat * ncat; i++) @@ -121,11 +121,11 @@ void calc_metrics(void) if (pi[i] == 0) metrics->user_acc[i] = -999; else - metrics->user_acc[i] = 100 * (1 - pii[i] / pi[i]); + metrics->user_acc[i] = 100 * (pii[i] / pi[i]); if (pj[i] == 0) metrics->prod_acc[i] = -999; else - metrics->prod_acc[i] = 100 * (1 - pii[i] / pj[i]); + metrics->prod_acc[i] = 100 * (pii[i] / pj[i]); /* theta 1 */ p0 += pii[i]; /* theta 2 */ diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 804f9eec688..9cbae8dbcf4 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -17,7 +17,6 @@ struct _layer_ struct _metrics_ { - int ncat; long obs; long correct; long *matrix; diff --git a/raster/r.kappa/local_proto.h b/raster/r.kappa/local_proto.h index 36bdb7bd0f8..63cf558e535 100644 --- a/raster/r.kappa/local_proto.h +++ b/raster/r.kappa/local_proto.h @@ -16,6 +16,9 @@ void prn_error_mat(int out_cols, int hdr); /* prt2csv_mat.c */ void prn2csv_error_mat(int hdr); +/* prt_json.c */ +void prn_json(void); + /* stats.c */ int stats(void); diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index 26475bb0265..13cdcb127b3 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -123,6 +123,10 @@ int main(int argc, char **argv) if (G_parser(argc, argv)) exit(EXIT_FAILURE); + if (flags.j->answer && + (flags.m->answer || flags.h->answer || flags.w->answer)) + G_warning(_("When JSON output flag is set, all other formatting flags are ignored")); + G_get_window(&window); maps[0] = parms.ref->answer; @@ -136,9 +140,13 @@ int main(int argc, char **argv) /* run r.stats to obtain statistics of map layers */ stats(); + /* calculate metrics from stats */ + calc_metrics(); - if (flags.m->answer) { - /* prepare the data for calculation */ + if (flags.j->answer) { + prn_json(); + } + else if (flags.m->answer) { prn2csv_error_mat(flags.h->answer); } else { diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c index 5e9f6570219..6fc91a2d6c8 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/prt2csv_mat.c @@ -32,8 +32,6 @@ void prn2csv_error_mat(int hdr) return; } else { - calc_metrics(); - /* format and print out the error matrix in panels */ first_col = 0; last_col = ncat; diff --git a/raster/r.kappa/prt_json.c b/raster/r.kappa/prt_json.c new file mode 100644 index 00000000000..1ca78ff6ee2 --- /dev/null +++ b/raster/r.kappa/prt_json.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + + +void prn_json() +{ + bool first; + FILE *fd; + + if (output != NULL) + fd = fopen(output, "w"); + else + fd = stdout; + + if (fd == NULL) { + G_fatal_error(_("Cannot open file <%s> to write JSON output"), + output); + return; + } + + fprintf(fd, "{\n"); + fprintf(fd, " \"map1\": \"%s\",\n", maps[0]); + fprintf(fd, " \"map2\": \"%s\",\n", maps[1]); + fprintf(fd, " \"observations\": %ld,\n", metrics->obs); + fprintf(fd, " \"correct\": %ld,\n", metrics->correct); + fprintf(fd, " \"total_acc\": %.5f,\n", metrics->total_acc); + fprintf(fd, " \"kappa\": %.5f,\n", metrics->kappa); + fprintf(fd, " \"kappa_var\": %.5f,\n", metrics->kappa_var); + fprintf(fd, " \"cats\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", rlst[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"matrix\": [\n ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, "],\n ["); + bool cfirst = 1; + + for (int j = 0; j < ncat; j++) { + if (cfirst) + cfirst = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", metrics->matrix[ncat * i + j]); + } + } + fprintf(fd, "]\n ],\n"); + fprintf(fd, " \"prod_acc\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%.5f", metrics->prod_acc[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"user_acc\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%.5f", metrics->user_acc[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"cond_kappa\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%.5f", metrics->cond_kappa[i]); + } + fprintf(fd, "]\n"); + + fprintf(fd, "}\n"); + if (output != NULL) + fclose(fd); +} diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c index 585178ad250..8d8d2a68728 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/prt_kappa.c @@ -25,11 +25,11 @@ void prt_kappa(void) if (metrics->user_acc[i] == -999) fprintf(fd, "NA\t\t"); else - fprintf(fd, "%f\t", metrics->user_acc[i]); + fprintf(fd, "%f\t", 100 - metrics->user_acc[i]); if (metrics->prod_acc[i] == -999) fprintf(fd, "NA\t\t"); else - fprintf(fd, "%f\t", metrics->prod_acc[i]); + fprintf(fd, "%f\t", 100 - metrics->prod_acc[i]); if (metrics->cond_kappa[i] == -999) fprintf(fd, "NA\n"); else diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index d751150997f..3c22abdc247 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -35,8 +35,6 @@ void prn_error_mat(int out_cols, int hdr) return; } else { - calc_metrics(); - /* format and print out the error matrix in panels */ out_cols = (out_cols == 132) ? 9 : 5; num_panels = ncat / out_cols; diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index 8cbbab64211..d183c4e1374 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -11,11 +11,16 @@ import os import pathlib +import json + +from tempfile import NamedTemporaryFile from grass.script import read_command +from grass.script import decode from grass.script.core import tempname from grass.gunittest.case import TestCase from grass.gunittest.main import test +from grass.gunittest.checkers import keyvalue_equals class MatrixCorrectnessTest(TestCase): @@ -292,5 +297,124 @@ def test_standard_output(self): self.assertTrue(self.match(vals[2], 0.0)) +class JSONOutputTest(TestCase): + """Test printing of parameters in JSON format""" + + @classmethod + def setUpClass(cls): + """Import sample maps with known properties""" + cls.use_temp_region() + cls.runModule("g.region", n=5, s=0, e=5, w=0, res=1) + + cls.data_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "data") + # Normal case + cls.ref_1 = tempname(10) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "ref_1.ascii"), + output=cls.ref_1, + quiet=True, + ) + cls.class_1 = tempname(10) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "class_1.ascii"), + output=cls.class_1, + quiet=True, + ) + + cls.ref_json1 = { + "map1": cls.ref_1, + "map2": cls.class_1, + "observations": 18, + "correct": 11, + "total_acc": 61.111111, + "kappa": 0.52091, + "kappa_var": 0.016871, + "cats": [1, 2, 3, 4, 5, 6], + "matrix": [ + [4, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 1, 4, 0, 0, 0], + [3, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 0], + [0, 2, 0, 0, 0, 2], + ], + "prod_acc": [57.1429, 0.0, 100.0, -999, 100.0, 66.66666], + "user_acc": [100.0, -999, 80.0, 0.0, 100.0, 50.0], + "cond_kappa": [1.0, -999, 0.742857, 0.0, 1.0, 0.400], + } + # Degenerate case with all values missing + cls.ref_2 = tempname(10) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "ref_2.ascii"), + output=cls.ref_2, + quiet=True, + ) + cls.class_2 = tempname(10) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "class_2.ascii"), + output=cls.class_2, + quiet=True, + ) + cls.ref_json2 = { + "map1": cls.ref_2, + "map2": cls.class_2, + "observations": 25, + "correct": 0, + "total_acc": 0.0, + "kappa": 0.0, + "kappa_var": 0.0, + "cats": [0, 1, 2, 3, 4, 9], + "matrix": [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [8, 8, 4, 1, 4, 0], + ], + "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999], + "user_acc": [-999, -999, -999, -999, -999, 0.0], + "cond_kappa": [-999, -999, -999, -999, -999, 0.0], + } + + @classmethod + def tearDownClass(cls): + """Remove temporary data""" + cls.del_temp_region() + cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_2) + cls.runModule("g.remove", flags="f", type="raster", name=cls.class_2) + + def test_stdout(self): + out = read_command( + "r.kappa", + reference=self.ref_2, + classification=self.class_2, + flags="j", + quiet=True, + ) + json_out = json.loads(decode(out)) + self.assertTrue(keyvalue_equals(self.ref_json2, json_out, precision=4)) + + def test_file(self): + f = NamedTemporaryFile() + self.runModule( + "r.kappa", + reference=self.ref_2, + classification=self.class_2, + output=f.name, + flags="j", + quiet=True, + overwrite=True, + ) + json_out = json.loads(f.read()) + self.assertTrue(keyvalue_equals(self.ref_json2, json_out, precision=4)) + + if __name__ == "__main__": test() From a0d4a06058f896672ac84aaeefd1885c33297468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 1 Dec 2022 16:03:26 +0200 Subject: [PATCH 05/20] r.kappa: better handle case when maps do not have any common values --- raster/r.kappa/calc_metrics.c | 12 +- raster/r.kappa/testsuite/test_r_kappa.py | 222 +++++++++++++++-------- 2 files changed, 157 insertions(+), 77 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 21070ba57d7..305d9cebf74 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -19,6 +19,17 @@ void calc_metrics(void) double inter1 = 0, inter2 = 0; int a_i, b_i; + metrics = (METRICS *) G_malloc(sizeof(METRICS)); + if (nstats == 0) { + G_warning(_("Both maps have nothing in common. Check the computational region.")); + metrics->obs = 0; + metrics->correct = 0; + metrics->total_acc = 0; + metrics->kappa = -999; + metrics->kappa_var = -999; + return; + } + /* get the cat lists */ rlst = (long *)G_calloc(nstats * 2, sizeof(long)); clst = (long *)G_calloc(nstats, sizeof(long)); @@ -44,7 +55,6 @@ void calc_metrics(void) G_free(clst); /* fill matrix with observed counts */ - metrics = (METRICS *) G_malloc(sizeof(METRICS)); metrics->matrix = (long *)G_malloc((size_t)ncat * ncat * sizeof(long)); for (i = 0; i < ncat * ncat; i++) metrics->matrix[i] = 0; diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index d183c4e1374..80205da0371 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -307,113 +307,183 @@ def setUpClass(cls): cls.runModule("g.region", n=5, s=0, e=5, w=0, res=1) cls.data_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "data") + cls.refs = [] + cls.clas = [] + cls.outp = [] # Normal case - cls.ref_1 = tempname(10) + cls.refs.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), - output=cls.ref_1, + output=cls.refs[0], quiet=True, ) - cls.class_1 = tempname(10) + cls.clas.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), - output=cls.class_1, + output=cls.clas[0], quiet=True, ) - cls.ref_json1 = { - "map1": cls.ref_1, - "map2": cls.class_1, - "observations": 18, - "correct": 11, - "total_acc": 61.111111, - "kappa": 0.52091, - "kappa_var": 0.016871, - "cats": [1, 2, 3, 4, 5, 6], - "matrix": [ - [4, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 1, 4, 0, 0, 0], - [3, 0, 0, 0, 0, 1], - [0, 0, 0, 0, 1, 0], - [0, 2, 0, 0, 0, 2], - ], - "prod_acc": [57.1429, 0.0, 100.0, -999, 100.0, 66.66666], - "user_acc": [100.0, -999, 80.0, 0.0, 100.0, 50.0], - "cond_kappa": [1.0, -999, 0.742857, 0.0, 1.0, 0.400], - } - # Degenerate case with all values missing - cls.ref_2 = tempname(10) + cls.outp.append( + { + "map1": cls.refs[0], + "map2": cls.clas[0], + "observations": 18, + "correct": 11, + "total_acc": 61.111111, + "kappa": 0.52091, + "kappa_var": 0.016871, + "cats": [1, 2, 3, 4, 5, 6], + "matrix": [ + [4, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 1, 4, 0, 0, 0], + [3, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 0], + [0, 2, 0, 0, 0, 2], + ], + "prod_acc": [57.1429, 0.0, 100.0, -999, 100.0, 66.66666], + "user_acc": [100.0, -999, 80.0, 0.0, 100.0, 50.0], + "cond_kappa": [1.0, -999, 0.742857, 0.0, 1.0, 0.400], + } + ) + + # Bad case with no correct matches + cls.refs.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "ref_2.ascii"), - output=cls.ref_2, + output=cls.refs[1], quiet=True, ) - cls.class_2 = tempname(10) + cls.clas.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_2.ascii"), - output=cls.class_2, + output=cls.clas[1], quiet=True, ) - cls.ref_json2 = { - "map1": cls.ref_2, - "map2": cls.class_2, - "observations": 25, - "correct": 0, - "total_acc": 0.0, - "kappa": 0.0, - "kappa_var": 0.0, - "cats": [0, 1, 2, 3, 4, 9], - "matrix": [ - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [8, 8, 4, 1, 4, 0], - ], - "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999], - "user_acc": [-999, -999, -999, -999, -999, 0.0], - "cond_kappa": [-999, -999, -999, -999, -999, 0.0], - } + cls.outp.append( + { + "map1": cls.refs[1], + "map2": cls.clas[1], + "observations": 25, + "correct": 0, + "total_acc": 0.0, + "kappa": 0.0, + "kappa_var": 0.0, + "cats": [0, 1, 2, 3, 4, 9], + "matrix": [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [8, 8, 4, 1, 4, 0], + ], + "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999], + "user_acc": [-999, -999, -999, -999, -999, 0.0], + "cond_kappa": [-999, -999, -999, -999, -999, 0.0], + } + ) + + # Degenerate case #1 + cls.refs.append(tempname(10)) + cls.clas.append(tempname(10)) + cls.runModule( + "r.mapcalc", + expression=f"{cls.refs[2]}=null()", + quiet=True, + ) + cls.runModule( + "r.mapcalc", + expression=f"{cls.clas[2]}=null()", + quiet=True, + ) + cls.outp.append( + { + "map1": cls.refs[2], + "map2": cls.clas[2], + "observations": 0, + "correct": 0, + "total_acc": 0.0, + "kappa": -999.0, + "kappa_var": -999.0, + "cats": [], + "matrix": [[]], + "prod_acc": [], + "user_acc": [], + "cond_kappa": [], + } + ) + + # Degenerate case #2 + cls.refs.append(tempname(10)) + cls.clas.append(tempname(10)) + cls.runModule( + "r.mapcalc", + expression=f"{cls.refs[3]}=1", + quiet=True, + ) + cls.runModule( + "r.mapcalc", + expression=f"{cls.clas[3]}=null()", + quiet=True, + ) + cls.outp.append( + { + "map1": cls.refs[3], + "map2": cls.clas[3], + "observations": 0, + "correct": 0, + "total_acc": 0.0, + "kappa": -999.0, + "kappa_var": -999.0, + "cats": [], + "matrix": [[]], + "prod_acc": [], + "user_acc": [], + "cond_kappa": [], + } + ) @classmethod def tearDownClass(cls): """Remove temporary data""" cls.del_temp_region() - cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) - cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) - cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_2) - cls.runModule("g.remove", flags="f", type="raster", name=cls.class_2) + for ref in cls.refs: + cls.runModule("g.remove", flags="f", type="raster", name=ref) + for clas in cls.clas: + cls.runModule("g.remove", flags="f", type="raster", name=clas) def test_stdout(self): - out = read_command( - "r.kappa", - reference=self.ref_2, - classification=self.class_2, - flags="j", - quiet=True, - ) - json_out = json.loads(decode(out)) - self.assertTrue(keyvalue_equals(self.ref_json2, json_out, precision=4)) + for i in range(len(self.refs)): + out = read_command( + "r.kappa", + reference=self.refs[i], + classification=self.clas[i], + flags="j", + quiet=True, + ) + json_out = json.loads(decode(out)) + self.assertTrue(keyvalue_equals(self.outp[i], json_out, precision=4)) def test_file(self): - f = NamedTemporaryFile() - self.runModule( - "r.kappa", - reference=self.ref_2, - classification=self.class_2, - output=f.name, - flags="j", - quiet=True, - overwrite=True, - ) - json_out = json.loads(f.read()) - self.assertTrue(keyvalue_equals(self.ref_json2, json_out, precision=4)) + for i in range(len(self.refs)): + f = NamedTemporaryFile() + self.runModule( + "r.kappa", + reference=self.refs[i], + classification=self.clas[i], + output=f.name, + flags="j", + quiet=True, + overwrite=True, + ) + json_out = json.loads(f.read()) + self.assertTrue(keyvalue_equals(self.outp[i], json_out, precision=4)) if __name__ == "__main__": From a0e15ca6dd3ca3ed72de4f906358b75b7fbb7b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 1 Dec 2022 16:15:56 +0200 Subject: [PATCH 06/20] r.kappa: add documentation for JSON output --- raster/r.kappa/main.c | 2 +- raster/r.kappa/r.kappa.html | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index 13cdcb127b3..3fbed0491cd 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -117,7 +117,7 @@ int main(int argc, char **argv) flags.j = G_define_flag(); flags.j->key = 'j'; - flags.j->description = _("Print measures in JSON"); + flags.j->description = _("Print in JSON"); flags.j->guisection = _("Output settings"); if (G_parser(argc, argv)) diff --git a/raster/r.kappa/r.kappa.html b/raster/r.kappa/r.kappa.html index 85b765493ee..aea9d1c1e4b 100644 --- a/raster/r.kappa/r.kappa.html +++ b/raster/r.kappa/r.kappa.html @@ -33,6 +33,11 @@

DESCRIPTION

panel. There is a total at the bottom of each column representing the sum of all the rows in that column. +

+It is possible to obtain all data in a machine readable format +by enabling JSON output flag. JSON keys directly match variable +names in the C code for convenience. +

NOTES

It is recommended to reclassify categories of classified @@ -42,8 +47,11 @@

NOTES

information for each and every category.

-NA's in output file mean non-applicable in case -MASK exists. +NA's in output mean it was not possible to calculate the value +(e.g. calculation would involve division by zero). +In JSON output NA's are represented with value -999. +If there is no overlap between both maps, a warning is printed and +output values are set to 0 or -999 respectively.

The Estimated kappa value in r.kappa is the value @@ -93,6 +101,7 @@

SEE ALSO

r.stats -

AUTHOR

+

AUTHORS

-Tao Wen, University of Illinois at Urbana-Champaign, Illinois +Tao Wen, University of Illinois at Urbana-Champaign, Illinois
+Maris Nartiss, University of Latvia (JSON output) From 8018d317cb10118c1823e3244a4d92229ca89fdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 1 Dec 2022 16:35:25 +0200 Subject: [PATCH 07/20] r.kappa: remove unused variables --- raster/r.kappa/prt_mat.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index 3c22abdc247..e945db38154 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -7,15 +7,12 @@ void prn_error_mat(int out_cols, int hdr) { - int k; - long x; - int num_panels, at_panel; int cndx, rndx; int first_col = 0, last_col = 0; int addflag = 0; int thisone; - long t_row, t_col; + long t_row; long t_rowcount, grand_count; const char *mapone; FILE *fd; @@ -40,7 +37,6 @@ void prn_error_mat(int out_cols, int hdr) num_panels = ncat / out_cols; if (ncat % out_cols) num_panels++; - t_rowcount = 0; fprintf(fd, "\nError Matrix (MAP1: reference, MAP2: classification)\n"); From 81f86c09b6616c2641a01bdb9b3f5963eb85f161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 08:38:12 +0200 Subject: [PATCH 08/20] r.kappa: code after G_fatal_error is unreachable --- raster/r.kappa/prt2csv_mat.c | 4 +--- raster/r.kappa/prt_hdr.c | 4 +--- raster/r.kappa/prt_json.c | 4 +--- raster/r.kappa/prt_kappa.c | 4 +--- raster/r.kappa/prt_label.c | 4 +--- raster/r.kappa/prt_mat.c | 4 +--- 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c index 6fc91a2d6c8..6a5c5770dc2 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/prt2csv_mat.c @@ -26,11 +26,9 @@ void prn2csv_error_mat(int hdr) else fd = stdout; - if (fd == NULL) { + if (fd == NULL) G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), output); - return; - } else { /* format and print out the error matrix in panels */ first_col = 0; diff --git a/raster/r.kappa/prt_hdr.c b/raster/r.kappa/prt_hdr.c index baa7378c5f3..f92972c0790 100644 --- a/raster/r.kappa/prt_hdr.c +++ b/raster/r.kappa/prt_hdr.c @@ -14,10 +14,8 @@ void prn_header(void) if (output == NULL) fd = stdout; - else if ((fd = fopen(output, "w")) == NULL) { + else if ((fd = fopen(output, "w")) == NULL) G_fatal_error(_("Cannot open file <%s> to write header"), output); - return; - } /* print header */ fprintf(fd, "\t\t\t%s\n", title); diff --git a/raster/r.kappa/prt_json.c b/raster/r.kappa/prt_json.c index 1ca78ff6ee2..eb79dce40a9 100644 --- a/raster/r.kappa/prt_json.c +++ b/raster/r.kappa/prt_json.c @@ -15,11 +15,9 @@ void prn_json() else fd = stdout; - if (fd == NULL) { + if (fd == NULL) G_fatal_error(_("Cannot open file <%s> to write JSON output"), output); - return; - } fprintf(fd, "{\n"); fprintf(fd, " \"map1\": \"%s\",\n", maps[0]); diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c index 8d8d2a68728..9e6c23e17c9 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/prt_kappa.c @@ -12,11 +12,9 @@ void prt_kappa(void) if (output == NULL) fd = stdout; - else if ((fd = fopen(output, "a")) == NULL) { + else if ((fd = fopen(output, "a")) == NULL) G_fatal_error(_("Cannot open file <%s> to write kappa and relevant parameters"), output); - return; - } /* print out the comission and omission accuracy, and conditional kappa */ fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); diff --git a/raster/r.kappa/prt_label.c b/raster/r.kappa/prt_label.c index b55f6b4b961..06686c5bb3e 100644 --- a/raster/r.kappa/prt_label.c +++ b/raster/r.kappa/prt_label.c @@ -13,10 +13,8 @@ void prt_label(void) if (output == NULL) fd = stdout; - else if ((fd = fopen(output, "a")) == NULL) { + else if ((fd = fopen(output, "a")) == NULL) G_fatal_error(_("Can't open file <%s> to write label"), output); - return; - } /* print labels */ for (i = 0; i < nlayers; i++) { diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index e945db38154..6b451fada9f 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -26,11 +26,9 @@ void prn_error_mat(int out_cols, int hdr) else fd = stdout; - if (fd == NULL) { + if (fd == NULL) G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), output); - return; - } else { /* format and print out the error matrix in panels */ out_cols = (out_cols == 132) ? 9 : 5; From 12e148127f7854448ad3579359c101d74149d2fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 08:52:26 +0200 Subject: [PATCH 09/20] r.kappa: replace no data value with a constant variable --- raster/r.kappa/calc_metrics.c | 16 ++++++++-------- raster/r.kappa/kappa.h | 2 ++ raster/r.kappa/prt_kappa.c | 10 +++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 305d9cebf74..9d26a545c69 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -25,8 +25,8 @@ void calc_metrics(void) metrics->obs = 0; metrics->correct = 0; metrics->total_acc = 0; - metrics->kappa = -999; - metrics->kappa_var = -999; + metrics->kappa = na_value; + metrics->kappa_var = na_value; return; } @@ -90,8 +90,8 @@ void calc_metrics(void) } if (metrics->obs == 0) { metrics->total_acc = 0; - metrics->kappa = -999; - metrics->kappa_var = -999; + metrics->kappa = na_value; + metrics->kappa_var = na_value; return; } @@ -129,11 +129,11 @@ void calc_metrics(void) pj[i] = pj[i] / metrics->obs; pii[i] = pii[i] / metrics->obs; if (pi[i] == 0) - metrics->user_acc[i] = -999; + metrics->user_acc[i] = na_value; else metrics->user_acc[i] = 100 * (pii[i] / pi[i]); if (pj[i] == 0) - metrics->prod_acc[i] = -999; + metrics->prod_acc[i] = na_value; else metrics->prod_acc[i] = 100 * (pii[i] / pj[i]); /* theta 1 */ @@ -144,12 +144,12 @@ void calc_metrics(void) if (pC != 1) metrics->kappa = (p0 - pC) / (1 - pC); else - metrics->kappa = -999; + metrics->kappa = na_value; /* conditional kappa */ for (i = 0; i < ncat; i++) { if (pi[i] == 0 || (pi[i] == 1 && pj[i] == 1)) - metrics->cond_kappa[i] = -999; + metrics->cond_kappa[i] = na_value; else metrics->cond_kappa[i] = (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 9cbae8dbcf4..28fad9524f7 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -50,3 +50,5 @@ extern size_t nstats; #define METRICS struct _metrics_ extern METRICS *metrics; + +static const double na_value = -999.0; diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c index 9e6c23e17c9..bfdb15968a3 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/prt_kappa.c @@ -20,26 +20,26 @@ void prt_kappa(void) fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); for (i = 0; i < ncat; i++) { fprintf(fd, "%ld\t", rlst[i]); - if (metrics->user_acc[i] == -999) + if (metrics->user_acc[i] == na_value) fprintf(fd, "NA\t\t"); else fprintf(fd, "%f\t", 100 - metrics->user_acc[i]); - if (metrics->prod_acc[i] == -999) + if (metrics->prod_acc[i] == na_value) fprintf(fd, "NA\t\t"); else fprintf(fd, "%f\t", 100 - metrics->prod_acc[i]); - if (metrics->cond_kappa[i] == -999) + if (metrics->cond_kappa[i] == na_value) fprintf(fd, "NA\n"); else fprintf(fd, "%f\n", metrics->cond_kappa[i]); } fprintf(fd, "\n"); fprintf(fd, "Kappa\t\tKappa Variance\n"); - if (metrics->kappa == -999) + if (metrics->kappa == na_value) fprintf(fd, "NA"); else fprintf(fd, "%f", metrics->kappa); - if (metrics->kappa_var == -999) + if (metrics->kappa_var == na_value) fprintf(fd, "\tNA\n"); else fprintf(fd, "\t%f\n", metrics->kappa_var); From e039cf38c00d082e7de21b4e08fe6f09419b29d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 09:26:05 +0200 Subject: [PATCH 10/20] r.kappa: remove implicit casts --- raster/r.kappa/calc_metrics.c | 51 ++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 9d26a545c69..412e2438c1a 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -11,20 +11,21 @@ static int collapse(long *l, int n); void calc_metrics(void) { int i, j, k; + size_t l; long *clst; int ncat1, ncat2; int cndx; double *pi, *pj, *pii; - double p0 = 0, pC = 0; - double inter1 = 0, inter2 = 0; - int a_i, b_i; + double p0 = 0.0, pC = 0.0; + double inter1 = 0.0, inter2 = 0.0; + int a_i = 0, b_i = 0; metrics = (METRICS *) G_malloc(sizeof(METRICS)); if (nstats == 0) { G_warning(_("Both maps have nothing in common. Check the computational region.")); metrics->obs = 0; metrics->correct = 0; - metrics->total_acc = 0; + metrics->total_acc = 0.0; metrics->kappa = na_value; metrics->kappa_var = na_value; return; @@ -33,9 +34,9 @@ void calc_metrics(void) /* get the cat lists */ rlst = (long *)G_calloc(nstats * 2, sizeof(long)); clst = (long *)G_calloc(nstats, sizeof(long)); - for (i = 0; i < nstats; i++) { - rlst[i] = Gstats[i].cats[0]; - clst[i] = Gstats[i].cats[1]; + for (l = 0; l < nstats; l++) { + rlst[l] = Gstats[l].cats[0]; + clst[l] = Gstats[l].cats[1]; } /* sort the cat lists */ @@ -58,15 +59,15 @@ void calc_metrics(void) metrics->matrix = (long *)G_malloc((size_t)ncat * ncat * sizeof(long)); for (i = 0; i < ncat * ncat; i++) metrics->matrix[i] = 0; - for (i = 0; i < nstats; i++) { + for (l = 0; l < nstats; l++) { for (j = 0; j < ncat; j++) - if (rlst[j] == Gstats[i].cats[0]) + if (rlst[j] == Gstats[l].cats[0]) break; for (k = 0; k < ncat; k++) - if (rlst[k] == Gstats[i].cats[1]) + if (rlst[k] == Gstats[l].cats[1]) break; /* matrix: reference in columns, classification in rows */ - metrics->matrix[j * ncat + k] = Gstats[i].count; + metrics->matrix[j * ncat + k] = Gstats[l].count; } /* Calculate marginals */ @@ -89,7 +90,7 @@ void calc_metrics(void) metrics->rowsum[cndx] = t_row; } if (metrics->obs == 0) { - metrics->total_acc = 0; + metrics->total_acc = 0.0; metrics->kappa = na_value; metrics->kappa_var = na_value; return; @@ -104,18 +105,18 @@ void calc_metrics(void) metrics->prod_acc = (double *)G_calloc(ncat, sizeof(double)); for (i = 0; i < ncat; i++) { - for (j = 0; j < nstats; j++) { - if (Gstats[j].cats[0] == rlst[i]) { - pi[i] += Gstats[j].count; + for (l = 0; l < nstats; l++) { + if (Gstats[l].cats[0] == rlst[i]) { + pi[i] += Gstats[l].count; } - if (Gstats[j].cats[1] == rlst[i]) { - pj[i] += Gstats[j].count; + if (Gstats[l].cats[1] == rlst[i]) { + pj[i] += Gstats[l].count; } - if ((Gstats[j].cats[0] == Gstats[j].cats[1]) && - (Gstats[j].cats[0] == rlst[i])) { - pii[i] += Gstats[j].count; + if ((Gstats[l].cats[0] == Gstats[l].cats[1]) && + (Gstats[l].cats[0] == rlst[i])) { + pii[i] += Gstats[l].count; } } metrics->correct += pii[i]; @@ -157,16 +158,16 @@ void calc_metrics(void) } /* kappa variance */ - for (j = 0; j < nstats; j++) { - if (Gstats[j].cats[0] != Gstats[j].cats[1]) { + for (l = 0; l < nstats; l++) { + if (Gstats[l].cats[0] != Gstats[l].cats[1]) { for (i = 0; i < ncat; i++) { - if (Gstats[j].cats[0] == rlst[i]) + if (Gstats[l].cats[0] == rlst[i]) a_i = i; - if (Gstats[j].cats[1] == rlst[i]) + if (Gstats[l].cats[1] == rlst[i]) b_i = i; } inter2 += - Gstats[j].count * pow((pi[a_i] + pj[b_i]), 2.) / metrics->obs; + Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / metrics->obs; } } metrics->kappa_var = (inter1 + pow((1 - p0), 2.) * inter2 - From 3bfaa9f1d0793ec7344072473ec2077f76cb64ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 10:36:01 +0200 Subject: [PATCH 11/20] r.kappa: move from JSON output flag to output format selector --- raster/r.kappa/main.c | 26 +++++++++++++++--------- raster/r.kappa/r.kappa.html | 13 +++++------- raster/r.kappa/testsuite/test_r_kappa.py | 16 +++++++-------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index 3fbed0491cd..e4ec43e1fa9 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -56,12 +56,12 @@ int main(int argc, char **argv) struct GModule *module; struct { - struct Option *map, *ref, *output, *titles; + struct Option *map, *ref, *output, *titles, *format; } parms; struct { - struct Flag *m, *w, *h, *j; + struct Flag *m, *w, *h; } flags; G_gisinit(argv[0]); @@ -99,6 +99,17 @@ int main(int argc, char **argv) parms.titles->answer = "ACCURACY ASSESSMENT"; parms.titles->guisection = _("Output settings"); + parms.format = G_define_option(); + parms.format->key = "format"; + parms.format->type = TYPE_STRING; + parms.format->required = YES; + parms.format->label = _("Output format"); + parms.format->options = "plain,json"; + parms.format->descriptions = + "plain;Plain text output;" "json;JSON (JavaScript Object Notation);"; + parms.format->answer = "plain"; + parms.format->guisection = _("Output settings"); + flags.w = G_define_flag(); flags.w->key = 'w'; flags.w->label = _("Wide report"); @@ -115,17 +126,12 @@ int main(int argc, char **argv) flags.m->description = _("Print Matrix only"); flags.m->guisection = _("Output settings"); - flags.j = G_define_flag(); - flags.j->key = 'j'; - flags.j->description = _("Print in JSON"); - flags.j->guisection = _("Output settings"); - if (G_parser(argc, argv)) exit(EXIT_FAILURE); - if (flags.j->answer && + if (strcmp(parms.format->answer, "json") == 0 && (flags.m->answer || flags.h->answer || flags.w->answer)) - G_warning(_("When JSON output flag is set, all other formatting flags are ignored")); + G_warning(_("When JSON output format is requested, all formatting flags are ignored")); G_get_window(&window); @@ -143,7 +149,7 @@ int main(int argc, char **argv) /* calculate metrics from stats */ calc_metrics(); - if (flags.j->answer) { + if (strcmp(parms.format->answer, "json") == 0) { prn_json(); } else if (flags.m->answer) { diff --git a/raster/r.kappa/r.kappa.html b/raster/r.kappa/r.kappa.html index aea9d1c1e4b..c83af190a49 100644 --- a/raster/r.kappa/r.kappa.html +++ b/raster/r.kappa/r.kappa.html @@ -16,9 +16,10 @@

DESCRIPTION

pixels are tabulated.

-The report will be write to an output file which is in +The report will be written to an output file which is in plain text format and named by user at prompt of running -the program. +the program. To obtain machine readable version, specify a +json output format.

The body of the report is arranged in panels. The @@ -33,10 +34,6 @@

DESCRIPTION

panel. There is a total at the bottom of each column representing the sum of all the rows in that column. -

-It is possible to obtain all data in a machine readable format -by enabling JSON output flag. JSON keys directly match variable -names in the C code for convenience.

NOTES

@@ -49,9 +46,9 @@

NOTES

NA's in output mean it was not possible to calculate the value (e.g. calculation would involve division by zero). -In JSON output NA's are represented with value -999. +In JSON output NA's are represented with value -999.0. If there is no overlap between both maps, a warning is printed and -output values are set to 0 or -999 respectively. +output values are set to 0 or -999.0 respectively.

The Estimated kappa value in r.kappa is the value diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index 80205da0371..0ddc98f41c8 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -344,9 +344,9 @@ def setUpClass(cls): [0, 0, 0, 0, 1, 0], [0, 2, 0, 0, 0, 2], ], - "prod_acc": [57.1429, 0.0, 100.0, -999, 100.0, 66.66666], - "user_acc": [100.0, -999, 80.0, 0.0, 100.0, 50.0], - "cond_kappa": [1.0, -999, 0.742857, 0.0, 1.0, 0.400], + "prod_acc": [57.1429, 0.0, 100.0, -999.0, 100.0, 66.66666], + "user_acc": [100.0, -999.0, 80.0, 0.0, 100.0, 50.0], + "cond_kappa": [1.0, -999.0, 0.742857, 0.0, 1.0, 0.400], } ) @@ -383,9 +383,9 @@ def setUpClass(cls): [0, 0, 0, 0, 0, 0], [8, 8, 4, 1, 4, 0], ], - "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999], - "user_acc": [-999, -999, -999, -999, -999, 0.0], - "cond_kappa": [-999, -999, -999, -999, -999, 0.0], + "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999.0], + "user_acc": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], + "cond_kappa": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], } ) @@ -464,7 +464,7 @@ def test_stdout(self): "r.kappa", reference=self.refs[i], classification=self.clas[i], - flags="j", + format="json", quiet=True, ) json_out = json.loads(decode(out)) @@ -478,7 +478,7 @@ def test_file(self): reference=self.refs[i], classification=self.clas[i], output=f.name, - flags="j", + format="json", quiet=True, overwrite=True, ) From 132966c2e52909fbe9edc552dc2fbf99b4d58400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 10:58:50 +0200 Subject: [PATCH 12/20] r.kappa: use full names of variables --- raster/r.kappa/testsuite/test_r_kappa.py | 90 +++++++++++++----------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index 0ddc98f41c8..f4d8373889a 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -307,29 +307,29 @@ def setUpClass(cls): cls.runModule("g.region", n=5, s=0, e=5, w=0, res=1) cls.data_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "data") - cls.refs = [] - cls.clas = [] - cls.outp = [] + cls.references = [] + cls.classifications = [] + cls.expected_outputs = [] # Normal case - cls.refs.append(tempname(10)) + cls.references.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), - output=cls.refs[0], + output=cls.references[0], quiet=True, ) - cls.clas.append(tempname(10)) + cls.classifications.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), - output=cls.clas[0], + output=cls.classifications[0], quiet=True, ) - cls.outp.append( + cls.expected_outputs.append( { - "map1": cls.refs[0], - "map2": cls.clas[0], + "map1": cls.references[0], + "map2": cls.classifications[0], "observations": 18, "correct": 11, "total_acc": 61.111111, @@ -351,24 +351,24 @@ def setUpClass(cls): ) # Bad case with no correct matches - cls.refs.append(tempname(10)) + cls.references.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "ref_2.ascii"), - output=cls.refs[1], + output=cls.references[1], quiet=True, ) - cls.clas.append(tempname(10)) + cls.classifications.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_2.ascii"), - output=cls.clas[1], + output=cls.classifications[1], quiet=True, ) - cls.outp.append( + cls.expected_outputs.append( { - "map1": cls.refs[1], - "map2": cls.clas[1], + "map1": cls.references[1], + "map2": cls.classifications[1], "observations": 25, "correct": 0, "total_acc": 0.0, @@ -390,22 +390,22 @@ def setUpClass(cls): ) # Degenerate case #1 - cls.refs.append(tempname(10)) - cls.clas.append(tempname(10)) + cls.references.append(tempname(10)) + cls.classifications.append(tempname(10)) cls.runModule( "r.mapcalc", - expression=f"{cls.refs[2]}=null()", + expression=f"{cls.references[2]}=null()", quiet=True, ) cls.runModule( "r.mapcalc", - expression=f"{cls.clas[2]}=null()", + expression=f"{cls.classifications[2]}=null()", quiet=True, ) - cls.outp.append( + cls.expected_outputs.append( { - "map1": cls.refs[2], - "map2": cls.clas[2], + "map1": cls.references[2], + "map2": cls.classifications[2], "observations": 0, "correct": 0, "total_acc": 0.0, @@ -420,22 +420,22 @@ def setUpClass(cls): ) # Degenerate case #2 - cls.refs.append(tempname(10)) - cls.clas.append(tempname(10)) + cls.references.append(tempname(10)) + cls.classifications.append(tempname(10)) cls.runModule( "r.mapcalc", - expression=f"{cls.refs[3]}=1", + expression=f"{cls.references[3]}=1", quiet=True, ) cls.runModule( "r.mapcalc", - expression=f"{cls.clas[3]}=null()", + expression=f"{cls.classifications[3]}=null()", quiet=True, ) - cls.outp.append( + cls.expected_outputs.append( { - "map1": cls.refs[3], - "map2": cls.clas[3], + "map1": cls.references[3], + "map2": cls.classifications[3], "observations": 0, "correct": 0, "total_acc": 0.0, @@ -453,37 +453,41 @@ def setUpClass(cls): def tearDownClass(cls): """Remove temporary data""" cls.del_temp_region() - for ref in cls.refs: - cls.runModule("g.remove", flags="f", type="raster", name=ref) - for clas in cls.clas: - cls.runModule("g.remove", flags="f", type="raster", name=clas) + for reference in cls.references: + cls.runModule("g.remove", flags="f", type="raster", name=reference) + for classification in cls.classifications: + cls.runModule("g.remove", flags="f", type="raster", name=classification) def test_stdout(self): - for i in range(len(self.refs)): + for i in range(len(self.references)): out = read_command( "r.kappa", - reference=self.refs[i], - classification=self.clas[i], + reference=self.references[i], + classification=self.classifications[i], format="json", quiet=True, ) json_out = json.loads(decode(out)) - self.assertTrue(keyvalue_equals(self.outp[i], json_out, precision=4)) + self.assertTrue( + keyvalue_equals(self.expected_outputs[i], json_out, precision=4) + ) def test_file(self): - for i in range(len(self.refs)): + for i in range(len(self.references)): f = NamedTemporaryFile() self.runModule( "r.kappa", - reference=self.refs[i], - classification=self.clas[i], + reference=self.references[i], + classification=self.classifications[i], output=f.name, format="json", quiet=True, overwrite=True, ) json_out = json.loads(f.read()) - self.assertTrue(keyvalue_equals(self.outp[i], json_out, precision=4)) + self.assertTrue( + keyvalue_equals(self.expected_outputs[i], json_out, precision=4) + ) if __name__ == "__main__": From b1cf41e35066c1a8a767c055b1b9c0d6f9e1c984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 11:04:31 +0200 Subject: [PATCH 13/20] r.kappa: no need for quiet flag in tests if output is ignored anyway --- raster/r.kappa/testsuite/test_r_kappa.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index f4d8373889a..e595fc817ee 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -38,14 +38,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), output=cls.class_1, - quiet=True, ) @classmethod @@ -96,14 +94,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), output=cls.class_1, - quiet=True, ) cls.per_class = { "producer": [ @@ -209,14 +205,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_2.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_2.ascii"), output=cls.class_1, - quiet=True, ) cls.per_class = { "producer": [ @@ -316,14 +310,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), output=cls.references[0], - quiet=True, ) cls.classifications.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), output=cls.classifications[0], - quiet=True, ) cls.expected_outputs.append( @@ -356,14 +348,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_2.ascii"), output=cls.references[1], - quiet=True, ) cls.classifications.append(tempname(10)) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_2.ascii"), output=cls.classifications[1], - quiet=True, ) cls.expected_outputs.append( { @@ -395,12 +385,10 @@ def setUpClass(cls): cls.runModule( "r.mapcalc", expression=f"{cls.references[2]}=null()", - quiet=True, ) cls.runModule( "r.mapcalc", expression=f"{cls.classifications[2]}=null()", - quiet=True, ) cls.expected_outputs.append( { @@ -425,12 +413,10 @@ def setUpClass(cls): cls.runModule( "r.mapcalc", expression=f"{cls.references[3]}=1", - quiet=True, ) cls.runModule( "r.mapcalc", expression=f"{cls.classifications[3]}=null()", - quiet=True, ) cls.expected_outputs.append( { @@ -481,7 +467,6 @@ def test_file(self): classification=self.classifications[i], output=f.name, format="json", - quiet=True, overwrite=True, ) json_out = json.loads(f.read()) From 0b21367d605dad76c26f32dc70f83272bbd6846e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 13:00:27 +0200 Subject: [PATCH 14/20] r.kappa: expand variable names for better readability --- raster/r.kappa/calc_metrics.c | 67 +++++++++++++----------- raster/r.kappa/kappa.h | 16 +++--- raster/r.kappa/main.c | 3 +- raster/r.kappa/prt2csv_mat.c | 6 +-- raster/r.kappa/prt_json.c | 43 +++++++++++---- raster/r.kappa/prt_kappa.c | 20 +++---- raster/r.kappa/prt_mat.c | 6 +-- raster/r.kappa/r.kappa.html | 42 +++++++++++++++ raster/r.kappa/testsuite/test_r_kappa.py | 64 ++++++++++++---------- 9 files changed, 172 insertions(+), 95 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 412e2438c1a..f7fbc53dcbb 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -23,11 +23,11 @@ void calc_metrics(void) metrics = (METRICS *) G_malloc(sizeof(METRICS)); if (nstats == 0) { G_warning(_("Both maps have nothing in common. Check the computational region.")); - metrics->obs = 0; + metrics->observations = 0; metrics->correct = 0; - metrics->total_acc = 0.0; + metrics->overall_accuracy = 0.0; metrics->kappa = na_value; - metrics->kappa_var = na_value; + metrics->kappa_variance = na_value; return; } @@ -71,10 +71,10 @@ void calc_metrics(void) } /* Calculate marginals */ - metrics->obs = 0; + metrics->observations = 0; metrics->correct = 0; - metrics->colsum = (long *)G_malloc(ncat * sizeof(long)); - metrics->rowsum = (long *)G_malloc(ncat * sizeof(long)); + metrics->col_sum = (long *)G_malloc(ncat * sizeof(long)); + metrics->row_sum = (long *)G_malloc(ncat * sizeof(long)); for (cndx = 0; cndx < ncat; cndx++) { long t_col = 0; long t_row = 0; @@ -85,24 +85,27 @@ void calc_metrics(void) x += ncat; t_row += metrics->matrix[cndx * ncat + k]; } - metrics->obs += t_row; - metrics->colsum[cndx] = t_col; - metrics->rowsum[cndx] = t_row; + metrics->observations += t_row; + metrics->col_sum[cndx] = t_col; + metrics->row_sum[cndx] = t_row; } - if (metrics->obs == 0) { - metrics->total_acc = 0.0; + if (metrics->observations == 0) { + metrics->overall_accuracy = 0.0; metrics->kappa = na_value; - metrics->kappa_var = na_value; + metrics->kappa_variance = na_value; return; } /* Calculate kappa values */ + /* Row sum */ pi = (double *)G_calloc(ncat, sizeof(double)); + /* Col sum */ pj = (double *)G_calloc(ncat, sizeof(double)); + /* Correct */ pii = (double *)G_calloc(ncat, sizeof(double)); - metrics->cond_kappa = (double *)G_calloc(ncat, sizeof(double)); - metrics->user_acc = (double *)G_calloc(ncat, sizeof(double)); - metrics->prod_acc = (double *)G_calloc(ncat, sizeof(double)); + metrics->conditional_kappa = (double *)G_calloc(ncat, sizeof(double)); + metrics->users_accuracy = (double *)G_calloc(ncat, sizeof(double)); + metrics->producers_accuracy = (double *)G_calloc(ncat, sizeof(double)); for (i = 0; i < ncat; i++) { for (l = 0; l < nstats; l++) { @@ -122,21 +125,22 @@ void calc_metrics(void) metrics->correct += pii[i]; } - metrics->total_acc = 100. * metrics->correct / metrics->obs; + metrics->overall_accuracy = + 100. * metrics->correct / metrics->observations; /* turn observations into probabilities */ for (i = 0; i < ncat; i++) { - pi[i] = pi[i] / metrics->obs; - pj[i] = pj[i] / metrics->obs; - pii[i] = pii[i] / metrics->obs; + pi[i] = pi[i] / metrics->observations; + pj[i] = pj[i] / metrics->observations; + pii[i] = pii[i] / metrics->observations; if (pi[i] == 0) - metrics->user_acc[i] = na_value; + metrics->users_accuracy[i] = na_value; else - metrics->user_acc[i] = 100 * (pii[i] / pi[i]); + metrics->users_accuracy[i] = 100 * (pii[i] / pi[i]); if (pj[i] == 0) - metrics->prod_acc[i] = na_value; + metrics->producers_accuracy[i] = na_value; else - metrics->prod_acc[i] = 100 * (pii[i] / pj[i]); + metrics->producers_accuracy[i] = 100 * (pii[i] / pj[i]); /* theta 1 */ p0 += pii[i]; /* theta 2 */ @@ -147,12 +151,12 @@ void calc_metrics(void) else metrics->kappa = na_value; - /* conditional kappa */ + /* conditional user's kappa */ for (i = 0; i < ncat; i++) { if (pi[i] == 0 || (pi[i] == 1 && pj[i] == 1)) - metrics->cond_kappa[i] = na_value; + metrics->conditional_kappa[i] = na_value; else - metrics->cond_kappa[i] = + metrics->conditional_kappa[i] = (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); } @@ -167,13 +171,14 @@ void calc_metrics(void) b_i = i; } inter2 += - Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / metrics->obs; + Gstats[l].count * pow((pi[a_i] + pj[b_i]), + 2.) / metrics->observations; } } - metrics->kappa_var = (inter1 + pow((1 - p0), 2.) * inter2 - - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), - 4.) / - metrics->obs; + metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - + pow((p0 * pC - 2 * pC + p0), + 2.)) / pow((1 - pC), + 4.) / metrics->observations; G_free(pi); G_free(pj); diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 28fad9524f7..7e20092f52b 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -17,17 +17,17 @@ struct _layer_ struct _metrics_ { - long obs; + long observations; long correct; long *matrix; - long *rowsum; - long *colsum; - double total_acc; - double *prod_acc; - double *user_acc; + long *row_sum; + long *col_sum; + double overall_accuracy; + double *producers_accuracy; + double *users_accuracy; double kappa; - double kappa_var; - double *cond_kappa; + double kappa_variance; + double *conditional_kappa; }; extern struct Cell_head window; diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index e4ec43e1fa9..9342f8e968d 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -10,10 +10,11 @@ * Glynn Clements , * Jachym Cepicky , * Jan-Oliver Wagner + * Maris Nartiss * PURPOSE: tabulates the error matrix of classification result by * crossing classified map layer with respect to reference map * layer - * COPYRIGHT: (C) 1999-2006 by the GRASS Development Team + * COPYRIGHT: (C) 1999-2022 by the GRASS Development Team * * This program is free software under the GNU General Public * License (>=v2). Read the file COPYING that comes with GRASS diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c index 6a5c5770dc2..58079c0d8d7 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/prt2csv_mat.c @@ -68,16 +68,16 @@ void prn2csv_error_mat(int hdr) fprintf(fd, "%ld\t", metrics->matrix[thisone]); } /* row marginal summation */ - fprintf(fd, "%ld", metrics->rowsum[rndx]); + fprintf(fd, "%ld", metrics->row_sum[rndx]); fprintf(fd, "\n"); } /* column marginal summation */ fprintf(fd, "ColSum\t"); for (cndx = first_col; cndx < last_col; cndx++) { - fprintf(fd, "%ld\t", metrics->colsum[cndx]); + fprintf(fd, "%ld\t", metrics->col_sum[cndx]); } /* grand total */ - fprintf(fd, "%ld", metrics->obs); + fprintf(fd, "%ld", metrics->observations); fprintf(fd, "\n\n"); if (output != NULL) fclose(fd); diff --git a/raster/r.kappa/prt_json.c b/raster/r.kappa/prt_json.c index eb79dce40a9..0f9c4dc9e2d 100644 --- a/raster/r.kappa/prt_json.c +++ b/raster/r.kappa/prt_json.c @@ -20,13 +20,14 @@ void prn_json() output); fprintf(fd, "{\n"); - fprintf(fd, " \"map1\": \"%s\",\n", maps[0]); - fprintf(fd, " \"map2\": \"%s\",\n", maps[1]); - fprintf(fd, " \"observations\": %ld,\n", metrics->obs); + fprintf(fd, " \"reference\": \"%s\",\n", maps[0]); + fprintf(fd, " \"classification\": \"%s\",\n", maps[1]); + fprintf(fd, " \"observations\": %ld,\n", metrics->observations); fprintf(fd, " \"correct\": %ld,\n", metrics->correct); - fprintf(fd, " \"total_acc\": %.5f,\n", metrics->total_acc); + fprintf(fd, " \"overall_accuracy\": %.5f,\n", + metrics->overall_accuracy); fprintf(fd, " \"kappa\": %.5f,\n", metrics->kappa); - fprintf(fd, " \"kappa_var\": %.5f,\n", metrics->kappa_var); + fprintf(fd, " \"kappa_variance\": %.5f,\n", metrics->kappa_variance); fprintf(fd, " \"cats\": ["); first = 1; for (int i = 0; i < ncat; i++) { @@ -55,34 +56,54 @@ void prn_json() } } fprintf(fd, "]\n ],\n"); - fprintf(fd, " \"prod_acc\": ["); + fprintf(fd, " \"row_sum\": ["); first = 1; for (int i = 0; i < ncat; i++) { if (first) first = 0; else fprintf(fd, ", "); - fprintf(fd, "%.5f", metrics->prod_acc[i]); + fprintf(fd, "%ld", metrics->row_sum[i]); } fprintf(fd, "],\n"); - fprintf(fd, " \"user_acc\": ["); + fprintf(fd, " \"col_sum\": ["); first = 1; for (int i = 0; i < ncat; i++) { if (first) first = 0; else fprintf(fd, ", "); - fprintf(fd, "%.5f", metrics->user_acc[i]); + fprintf(fd, "%ld", metrics->col_sum[i]); } fprintf(fd, "],\n"); - fprintf(fd, " \"cond_kappa\": ["); + fprintf(fd, " \"producers_accuracy\": ["); first = 1; for (int i = 0; i < ncat; i++) { if (first) first = 0; else fprintf(fd, ", "); - fprintf(fd, "%.5f", metrics->cond_kappa[i]); + fprintf(fd, "%.5f", metrics->producers_accuracy[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"users_accuracy\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%.5f", metrics->users_accuracy[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"conditional_kappa\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%.5f", metrics->conditional_kappa[i]); } fprintf(fd, "]\n"); diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c index bfdb15968a3..8e9351f4b08 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/prt_kappa.c @@ -20,18 +20,18 @@ void prt_kappa(void) fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); for (i = 0; i < ncat; i++) { fprintf(fd, "%ld\t", rlst[i]); - if (metrics->user_acc[i] == na_value) + if (metrics->users_accuracy[i] == na_value) fprintf(fd, "NA\t\t"); else - fprintf(fd, "%f\t", 100 - metrics->user_acc[i]); - if (metrics->prod_acc[i] == na_value) + fprintf(fd, "%f\t", 100 - metrics->users_accuracy[i]); + if (metrics->producers_accuracy[i] == na_value) fprintf(fd, "NA\t\t"); else - fprintf(fd, "%f\t", 100 - metrics->prod_acc[i]); - if (metrics->cond_kappa[i] == na_value) + fprintf(fd, "%f\t", 100 - metrics->producers_accuracy[i]); + if (metrics->conditional_kappa[i] == na_value) fprintf(fd, "NA\n"); else - fprintf(fd, "%f\n", metrics->cond_kappa[i]); + fprintf(fd, "%f\n", metrics->conditional_kappa[i]); } fprintf(fd, "\n"); fprintf(fd, "Kappa\t\tKappa Variance\n"); @@ -39,14 +39,14 @@ void prt_kappa(void) fprintf(fd, "NA"); else fprintf(fd, "%f", metrics->kappa); - if (metrics->kappa_var == na_value) + if (metrics->kappa_variance == na_value) fprintf(fd, "\tNA\n"); else - fprintf(fd, "\t%f\n", metrics->kappa_var); + fprintf(fd, "\t%f\n", metrics->kappa_variance); fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); - fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->obs, - metrics->total_acc); + fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->observations, + metrics->overall_accuracy); if (output != NULL) fclose(fd); diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index 6b451fada9f..18f1c694c25 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -75,18 +75,18 @@ void prn_error_mat(int out_cols, int hdr) } /* row marginal summation */ if (addflag) { - fprintf(fd, "%ld", metrics->rowsum[rndx]); + fprintf(fd, "%ld", metrics->row_sum[rndx]); } fprintf(fd, "\n"); } /* column marginal summation */ fprintf(fd, "Col Sum\t\t"); for (cndx = first_col; cndx < last_col; cndx++) { - fprintf(fd, "%ld\t", metrics->colsum[cndx]); + fprintf(fd, "%ld\t", metrics->col_sum[cndx]); } /* grand total */ if (addflag) - fprintf(fd, "%ld", metrics->obs); + fprintf(fd, "%ld", metrics->observations); fprintf(fd, "\n\n"); } diff --git a/raster/r.kappa/r.kappa.html b/raster/r.kappa/r.kappa.html index c83af190a49..42c453b3368 100644 --- a/raster/r.kappa/r.kappa.html +++ b/raster/r.kappa/r.kappa.html @@ -34,9 +34,46 @@

DESCRIPTION

panel. There is a total at the bottom of each column representing the sum of all the rows in that column. +

OUTPUT VARIABLES

+

+All output variables (except kappa variance) have been +validated to produce correct values in accordance +to formulas given by Rossiter, D.G., 2004. "Technical Note: +Statistical methods for accuracy assessment of classified +thematic maps". +

+
Observations
+
Overall count of observed cells (sum of both correct + and incorrect ones).
+
Correct
+
Overall count of correct cells (cells with equal value + in reference and classification maps).
+
Overall accuracy
+
Number of correct cells divided by overall cell count + (expressed in percent).
+
User's accuracy
+
Share of correctly classified cells out of all cells + classified as belonging to specified class (expressed in percent). + Inverse of commission error.
+
Commission
+
Commission error = 100 - user's accuracy.
+
Producer's accuracy
+
Share of correctly classified cells out of all cells + known to belong to specified class (expressed in percent). + Inverse of omission error.
+
Omission
+
Omission error = 100 - producer's accuracy.
+
Kappa
+
Choen's kappa index value.
+
Kappa variance
+
Variance of kappa index. Correctness needs to be validated.
+
Conditional kappa
+
Conditional user's kappa for specified class.
+

NOTES

+

It is recommended to reclassify categories of classified result map layer into a more manageable number before running r.kappa on the classified raster map @@ -68,6 +105,11 @@

NOTES

  • Pj[i] is the probability of classification j having classified the point as i.
  • +

    +Some of reported values (Choen's kappa, overall accuracy) can be +misleading if cell count among classes is not balanced. See e.g. +Powers, D.M.W., 2012. The Problem with Kappa. +

    EXAMPLE

    Example for North Carolina sample dataset: diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index e595fc817ee..ad6ba6e22ed 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -320,13 +320,13 @@ def setUpClass(cls): cls.expected_outputs.append( { - "map1": cls.references[0], - "map2": cls.classifications[0], + "reference": cls.references[0], + "classification": cls.classifications[0], "observations": 18, "correct": 11, - "total_acc": 61.111111, + "overall_accuracy": 61.111111, "kappa": 0.52091, - "kappa_var": 0.016871, + "kappa_variance": 0.016871, "cats": [1, 2, 3, 4, 5, 6], "matrix": [ [4, 0, 0, 0, 0, 0], @@ -336,9 +336,11 @@ def setUpClass(cls): [0, 0, 0, 0, 1, 0], [0, 2, 0, 0, 0, 2], ], - "prod_acc": [57.1429, 0.0, 100.0, -999.0, 100.0, 66.66666], - "user_acc": [100.0, -999.0, 80.0, 0.0, 100.0, 50.0], - "cond_kappa": [1.0, -999.0, 0.742857, 0.0, 1.0, 0.400], + "row_sum": [4, 0, 5, 4, 1, 4], + "col_sum": [7, 3, 4, 0, 1, 3], + "producers_accuracy": [57.1429, 0.0, 100.0, -999.0, 100.0, 66.66666], + "users_accuracy": [100.0, -999.0, 80.0, 0.0, 100.0, 50.0], + "conditional_kappa": [1.0, -999.0, 0.742857, 0.0, 1.0, 0.400], } ) @@ -357,13 +359,13 @@ def setUpClass(cls): ) cls.expected_outputs.append( { - "map1": cls.references[1], - "map2": cls.classifications[1], + "reference": cls.references[1], + "classification": cls.classifications[1], "observations": 25, "correct": 0, - "total_acc": 0.0, + "overall_accuracy": 0.0, "kappa": 0.0, - "kappa_var": 0.0, + "kappa_variance": 0.0, "cats": [0, 1, 2, 3, 4, 9], "matrix": [ [0, 0, 0, 0, 0, 0], @@ -373,9 +375,11 @@ def setUpClass(cls): [0, 0, 0, 0, 0, 0], [8, 8, 4, 1, 4, 0], ], - "prod_acc": [0.0, 0.0, 0.0, 0.0, 0.0, -999.0], - "user_acc": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], - "cond_kappa": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], + "row_sum": [0, 0, 0, 0, 0, 25], + "col_sum": [8, 8, 4, 1, 4, 0], + "producers_accuracy": [0.0, 0.0, 0.0, 0.0, 0.0, -999.0], + "users_accuracy": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], + "conditional_kappa": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], } ) @@ -392,18 +396,20 @@ def setUpClass(cls): ) cls.expected_outputs.append( { - "map1": cls.references[2], - "map2": cls.classifications[2], + "reference": cls.references[2], + "classification": cls.classifications[2], "observations": 0, "correct": 0, - "total_acc": 0.0, + "overall_accuracy": 0.0, "kappa": -999.0, - "kappa_var": -999.0, + "kappa_variance": -999.0, "cats": [], "matrix": [[]], - "prod_acc": [], - "user_acc": [], - "cond_kappa": [], + "row_sum": [], + "col_sum": [], + "producers_accuracy": [], + "users_accuracy": [], + "conditional_kappa": [], } ) @@ -420,18 +426,20 @@ def setUpClass(cls): ) cls.expected_outputs.append( { - "map1": cls.references[3], - "map2": cls.classifications[3], + "reference": cls.references[3], + "classification": cls.classifications[3], "observations": 0, "correct": 0, - "total_acc": 0.0, + "overall_accuracy": 0.0, "kappa": -999.0, - "kappa_var": -999.0, + "kappa_variance": -999.0, "cats": [], "matrix": [[]], - "prod_acc": [], - "user_acc": [], - "cond_kappa": [], + "row_sum": [], + "col_sum": [], + "producers_accuracy": [], + "users_accuracy": [], + "conditional_kappa": [], } ) From 21f13dddaa9ba183685439663ac36795e7a9bba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 13:08:02 +0200 Subject: [PATCH 15/20] r.kappa: reindent code with clang-format --- raster/r.kappa/calc_metrics.c | 24 ++++++++++-------------- raster/r.kappa/main.c | 22 +++++++++++----------- raster/r.kappa/mask.c | 1 - raster/r.kappa/prt2csv_mat.c | 10 +++++----- raster/r.kappa/prt_hdr.c | 5 ++--- raster/r.kappa/prt_json.c | 7 ++----- raster/r.kappa/prt_kappa.c | 6 +++--- raster/r.kappa/prt_label.c | 3 +-- raster/r.kappa/prt_mat.c | 12 ++++++------ raster/r.kappa/stats.c | 7 ++----- 10 files changed, 42 insertions(+), 55 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index f7fbc53dcbb..7b772a06629 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -7,7 +7,6 @@ static int longcomp(const void *aa, const void *bb); static int collapse(long *l, int n); - void calc_metrics(void) { int i, j, k; @@ -20,9 +19,10 @@ void calc_metrics(void) double inter1 = 0.0, inter2 = 0.0; int a_i = 0, b_i = 0; - metrics = (METRICS *) G_malloc(sizeof(METRICS)); + metrics = (METRICS *)G_malloc(sizeof(METRICS)); if (nstats == 0) { - G_warning(_("Both maps have nothing in common. Check the computational region.")); + G_warning(_("Both maps have nothing in common. Check the computational " + "region.")); metrics->observations = 0; metrics->correct = 0; metrics->overall_accuracy = 0.0; @@ -47,7 +47,8 @@ void calc_metrics(void) ncat1 = collapse(rlst, nstats); ncat2 = collapse(clst, nstats); - /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ + /* copy clst to the end of rlst, remove repeated cats, and free unused + * memory */ for (i = 0; i < ncat2; i++) rlst[ncat1 + i] = clst[i]; qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); @@ -125,8 +126,7 @@ void calc_metrics(void) metrics->correct += pii[i]; } - metrics->overall_accuracy = - 100. * metrics->correct / metrics->observations; + metrics->overall_accuracy = 100. * metrics->correct / metrics->observations; /* turn observations into probabilities */ for (i = 0; i < ncat; i++) { @@ -170,22 +170,19 @@ void calc_metrics(void) if (Gstats[l].cats[1] == rlst[i]) b_i = i; } - inter2 += - Gstats[l].count * pow((pi[a_i] + pj[b_i]), - 2.) / metrics->observations; + inter2 += Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / + metrics->observations; } } metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - - pow((p0 * pC - 2 * pC + p0), - 2.)) / pow((1 - pC), - 4.) / metrics->observations; + pow((p0 * pC - 2 * pC + p0), 2.)) / + pow((1 - pC), 4.) / metrics->observations; G_free(pi); G_free(pj); G_free(pii); }; - /* remove repeated values */ static int collapse(long *l, int n) { @@ -206,7 +203,6 @@ static int collapse(long *l, int n) return m; } - static int longcomp(const void *aa, const void *bb) { const long *a = aa; diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index 9342f8e968d..05e3e860214 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -4,15 +4,15 @@ * MODULE: r.kappa * AUTHOR(S): Tao Wen, UIUC (original contributor) * Markus Neteler , - * Roberto Flor , - * Bernhard Reiter , - * Brad Douglas , - * Glynn Clements , - * Jachym Cepicky , + * Roberto Flor , + * Bernhard Reiter , + * Brad Douglas , + * Glynn Clements , + * Jachym Cepicky , * Jan-Oliver Wagner * Maris Nartiss * PURPOSE: tabulates the error matrix of classification result by - * crossing classified map layer with respect to reference map + * crossing classified map layer with respect to reference map * layer * COPYRIGHT: (C) 1999-2022 by the GRASS Development Team * @@ -106,8 +106,8 @@ int main(int argc, char **argv) parms.format->required = YES; parms.format->label = _("Output format"); parms.format->options = "plain,json"; - parms.format->descriptions = - "plain;Plain text output;" "json;JSON (JavaScript Object Notation);"; + parms.format->descriptions = "plain;Plain text output;" + "json;JSON (JavaScript Object Notation);"; parms.format->answer = "plain"; parms.format->guisection = _("Output settings"); @@ -132,7 +132,8 @@ int main(int argc, char **argv) if (strcmp(parms.format->answer, "json") == 0 && (flags.m->answer || flags.h->answer || flags.w->answer)) - G_warning(_("When JSON output format is requested, all formatting flags are ignored")); + G_warning(_("When JSON output format is requested, all formatting " + "flags are ignored")); G_get_window(&window); @@ -170,7 +171,6 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - static void layer(const char *s) { char name[GNAME_MAX]; @@ -182,7 +182,7 @@ static void layer(const char *s) G_fatal_error(_("Raster map <%s> not found"), s); n = nlayers++; - layers = (LAYER *) G_realloc(layers, 2 * sizeof(LAYER)); + layers = (LAYER *)G_realloc(layers, 2 * sizeof(LAYER)); layers[n].name = G_store(name); layers[n].mapset = mapset; Rast_read_cats(name, mapset, &layers[n].labels); diff --git a/raster/r.kappa/mask.c b/raster/r.kappa/mask.c index 3f3e28ea9c2..729f7c5b13a 100644 --- a/raster/r.kappa/mask.c +++ b/raster/r.kappa/mask.c @@ -9,7 +9,6 @@ static char *append(char *results, char *text); static void do_text(char *text, long first, long last); static int reclass_text(char *text, struct Reclass *reclass, int next); - char *maskinfo(void) { struct Reclass reclass; diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c index 58079c0d8d7..d484c43eaf6 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/prt2csv_mat.c @@ -4,7 +4,6 @@ #include "kappa.h" #include "local_proto.h" - void prn2csv_error_mat(int hdr) { int j; @@ -27,8 +26,9 @@ void prn2csv_error_mat(int hdr) fd = stdout; if (fd == NULL) - G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), - output); + G_fatal_error( + _("Cannot open file <%s> to write cats and counts (error matrix)"), + output); else { /* format and print out the error matrix in panels */ first_col = 0; @@ -40,7 +40,7 @@ void prn2csv_error_mat(int hdr) /* print labels MAP1 */ for (j = 0; j < ncat; j++) { cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[j]), &(layers[0].labels)); + cl = Rast_get_c_cat((CELL *)&(cats[j]), &(layers[0].labels)); if (cl) G_strip(cl); if (cl == NULL || *cl == 0) @@ -55,7 +55,7 @@ void prn2csv_error_mat(int hdr) /* body of the matrix */ for (rndx = 0; rndx < ncat; rndx++) { cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[rndx]), &(layers[1].labels)); + cl = Rast_get_c_cat((CELL *)&(cats[rndx]), &(layers[1].labels)); if (cl) G_strip(cl); if (cl == NULL || *cl == 0) diff --git a/raster/r.kappa/prt_hdr.c b/raster/r.kappa/prt_hdr.c index f92972c0790..c34cd05a7d3 100644 --- a/raster/r.kappa/prt_hdr.c +++ b/raster/r.kappa/prt_hdr.c @@ -4,7 +4,6 @@ #include "kappa.h" #include "local_proto.h" - void prn_header(void) { int i, len; @@ -35,8 +34,8 @@ void prn_header(void) G_strip(titles); if (titles == NULL || *titles == 0) titles = "(untitled)"; - sprintf(buf, "%*s%-*s%d = %s (%s in %s)", i * 6, "", len, label, - i + 1, titles, layers[i].name, layers[i].mapset); + sprintf(buf, "%*s%-*s%d = %s (%s in %s)", i * 6, "", len, label, i + 1, + titles, layers[i].name, layers[i].mapset); fprintf(fd, "%s\n", buf); } diff --git a/raster/r.kappa/prt_json.c b/raster/r.kappa/prt_json.c index 0f9c4dc9e2d..b62951ad496 100644 --- a/raster/r.kappa/prt_json.c +++ b/raster/r.kappa/prt_json.c @@ -4,7 +4,6 @@ #include "kappa.h" #include "local_proto.h" - void prn_json() { bool first; @@ -16,16 +15,14 @@ void prn_json() fd = stdout; if (fd == NULL) - G_fatal_error(_("Cannot open file <%s> to write JSON output"), - output); + G_fatal_error(_("Cannot open file <%s> to write JSON output"), output); fprintf(fd, "{\n"); fprintf(fd, " \"reference\": \"%s\",\n", maps[0]); fprintf(fd, " \"classification\": \"%s\",\n", maps[1]); fprintf(fd, " \"observations\": %ld,\n", metrics->observations); fprintf(fd, " \"correct\": %ld,\n", metrics->correct); - fprintf(fd, " \"overall_accuracy\": %.5f,\n", - metrics->overall_accuracy); + fprintf(fd, " \"overall_accuracy\": %.5f,\n", metrics->overall_accuracy); fprintf(fd, " \"kappa\": %.5f,\n", metrics->kappa); fprintf(fd, " \"kappa_variance\": %.5f,\n", metrics->kappa_variance); fprintf(fd, " \"cats\": ["); diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/prt_kappa.c index 8e9351f4b08..a420ad6947c 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/prt_kappa.c @@ -4,7 +4,6 @@ #include "kappa.h" #include "local_proto.h" - void prt_kappa(void) { int i; @@ -13,8 +12,9 @@ void prt_kappa(void) if (output == NULL) fd = stdout; else if ((fd = fopen(output, "a")) == NULL) - G_fatal_error(_("Cannot open file <%s> to write kappa and relevant parameters"), - output); + G_fatal_error( + _("Cannot open file <%s> to write kappa and relevant parameters"), + output); /* print out the comission and omission accuracy, and conditional kappa */ fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); diff --git a/raster/r.kappa/prt_label.c b/raster/r.kappa/prt_label.c index 06686c5bb3e..bf8f1f0e948 100644 --- a/raster/r.kappa/prt_label.c +++ b/raster/r.kappa/prt_label.c @@ -3,7 +3,6 @@ #include "kappa.h" #include "local_proto.h" - void prt_label(void) { int i, j; @@ -22,7 +21,7 @@ void prt_label(void) fprintf(fd, "MAP%-d Category Description\n", i + 1); for (j = 0; j < ncat; j++) { cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[j]), &(layers[i].labels)); + cl = Rast_get_c_cat((CELL *)&(cats[j]), &(layers[i].labels)); if (cl) G_strip(cl); if (cl == NULL || *cl == 0) diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/prt_mat.c index 18f1c694c25..3eeaa511d36 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/prt_mat.c @@ -4,7 +4,6 @@ #include "kappa.h" #include "local_proto.h" - void prn_error_mat(int out_cols, int hdr) { int num_panels, at_panel; @@ -27,16 +26,16 @@ void prn_error_mat(int out_cols, int hdr) fd = stdout; if (fd == NULL) - G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), - output); + G_fatal_error( + _("Cannot open file <%s> to write cats and counts (error matrix)"), + output); else { /* format and print out the error matrix in panels */ out_cols = (out_cols == 132) ? 9 : 5; num_panels = ncat / out_cols; if (ncat % out_cols) num_panels++; - fprintf(fd, - "\nError Matrix (MAP1: reference, MAP2: classification)\n"); + fprintf(fd, "\nError Matrix (MAP1: reference, MAP2: classification)\n"); for (at_panel = 0; at_panel < num_panels; at_panel++) { first_col = at_panel * out_cols; @@ -44,7 +43,8 @@ void prn_error_mat(int out_cols, int hdr) if (last_col >= ncat) { last_col = ncat; } - /* determine whether room available for row total at the end of last panel */ + /* determine whether room available for row total at the end of last + * panel */ addflag = 0; if (at_panel == (num_panels - 1) && (last_col - first_col) < (out_cols - 1)) { diff --git a/raster/r.kappa/stats.c b/raster/r.kappa/stats.c index 55d8173d779..eb9b977d6b2 100644 --- a/raster/r.kappa/stats.c +++ b/raster/r.kappa/stats.c @@ -7,14 +7,12 @@ #include #include "local_proto.h" - static void die(void) { unlink(stats_file); G_fatal_error(_("Problem reading r.stats output")); } - int stats(void) { char buf[1024]; @@ -45,8 +43,7 @@ int stats(void) argv[argc++] = "separator=:"; - sprintf(buf, "input=%s,%s", - G_fully_qualified_name(mname, mmapset), + sprintf(buf, "input=%s,%s", G_fully_qualified_name(mname, mmapset), G_fully_qualified_name(rname, rmapset)); argv[argc++] = buf; @@ -72,7 +69,7 @@ int stats(void) tokens = G_tokenize(buf, ":"); i = 0; ns = nstats++; - Gstats = (GSTATS *) G_realloc(Gstats, nstats * sizeof(GSTATS)); + Gstats = (GSTATS *)G_realloc(Gstats, nstats * sizeof(GSTATS)); Gstats[ns].cats = (long *)G_calloc(nlayers, sizeof(long)); for (nl = 0; nl < nlayers; nl++) { if (sscanf(tokens[i++], "%ld", &Gstats[ns].cats[nl]) != 1) From f08caf142eca1d573e98115177c054485614e531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 2 Dec 2022 13:50:55 +0200 Subject: [PATCH 16/20] r.kappa: expand function names --- raster/r.kappa/local_proto.h | 24 +++++++++---------- raster/r.kappa/main.c | 10 ++++---- .../{prt2csv_mat.c => print2csv_mat.c} | 2 +- raster/r.kappa/{prt_hdr.c => print_header.c} | 2 +- raster/r.kappa/{prt_json.c => print_json.c} | 2 +- raster/r.kappa/{prt_kappa.c => print_kappa.c} | 4 ++-- raster/r.kappa/{prt_label.c => print_label.c} | 2 +- raster/r.kappa/{prt_mat.c => print_mat.c} | 2 +- 8 files changed, 24 insertions(+), 24 deletions(-) rename raster/r.kappa/{prt2csv_mat.c => print2csv_mat.c} (98%) rename raster/r.kappa/{prt_hdr.c => print_header.c} (97%) rename raster/r.kappa/{prt_json.c => print_json.c} (99%) rename raster/r.kappa/{prt_kappa.c => print_kappa.c} (97%) rename raster/r.kappa/{prt_label.c => print_label.c} (97%) rename raster/r.kappa/{prt_mat.c => print_mat.c} (98%) diff --git a/raster/r.kappa/local_proto.h b/raster/r.kappa/local_proto.h index 63cf558e535..8e08f5343ff 100644 --- a/raster/r.kappa/local_proto.h +++ b/raster/r.kappa/local_proto.h @@ -1,23 +1,23 @@ -/* prt_kappa.c */ -void prt_kappa(void); +/* print_kappa.c */ +void print_kappa(void); /* mask.c */ char *maskinfo(void); -/* prt_hdr.c */ -void prn_header(void); +/* print_hdr.c */ +void print_header(void); -/* prt_label.c */ -void prt_label(void); +/* print_label.c */ +void print_label(void); -/* prt_mat.c */ -void prn_error_mat(int out_cols, int hdr); +/* print_mat.c */ +void print_error_mat(int out_cols, int hdr); -/* prt2csv_mat.c */ -void prn2csv_error_mat(int hdr); +/* print2csv_mat.c */ +void print2csv_error_mat(int hdr); -/* prt_json.c */ -void prn_json(void); +/* print_json.c */ +void print_json(void); /* stats.c */ int stats(void); diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index 05e3e860214..a7501262ae6 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -152,21 +152,21 @@ int main(int argc, char **argv) calc_metrics(); if (strcmp(parms.format->answer, "json") == 0) { - prn_json(); + print_json(); } else if (flags.m->answer) { - prn2csv_error_mat(flags.h->answer); + print2csv_error_mat(flags.h->answer); } else { /* print header of the output */ if (!flags.h->answer) - prn_header(); + print_header(); /* prepare the data for calculation */ - prn_error_mat(flags.w->answer ? 132 : 80, flags.h->answer); + print_error_mat(flags.w->answer ? 132 : 80, flags.h->answer); /* generate the error matrix, kappa and variance */ - prt_kappa(); + print_kappa(); } return EXIT_SUCCESS; } diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/print2csv_mat.c similarity index 98% rename from raster/r.kappa/prt2csv_mat.c rename to raster/r.kappa/print2csv_mat.c index d484c43eaf6..8365d6c1cf0 100644 --- a/raster/r.kappa/prt2csv_mat.c +++ b/raster/r.kappa/print2csv_mat.c @@ -4,7 +4,7 @@ #include "kappa.h" #include "local_proto.h" -void prn2csv_error_mat(int hdr) +void print2csv_error_mat(int hdr) { int j; diff --git a/raster/r.kappa/prt_hdr.c b/raster/r.kappa/print_header.c similarity index 97% rename from raster/r.kappa/prt_hdr.c rename to raster/r.kappa/print_header.c index c34cd05a7d3..eed803ae660 100644 --- a/raster/r.kappa/prt_hdr.c +++ b/raster/r.kappa/print_header.c @@ -4,7 +4,7 @@ #include "kappa.h" #include "local_proto.h" -void prn_header(void) +void print_header(void) { int i, len; char buf[1024], *titles, *label; diff --git a/raster/r.kappa/prt_json.c b/raster/r.kappa/print_json.c similarity index 99% rename from raster/r.kappa/prt_json.c rename to raster/r.kappa/print_json.c index b62951ad496..3d1ab360e9e 100644 --- a/raster/r.kappa/prt_json.c +++ b/raster/r.kappa/print_json.c @@ -4,7 +4,7 @@ #include "kappa.h" #include "local_proto.h" -void prn_json() +void print_json() { bool first; FILE *fd; diff --git a/raster/r.kappa/prt_kappa.c b/raster/r.kappa/print_kappa.c similarity index 97% rename from raster/r.kappa/prt_kappa.c rename to raster/r.kappa/print_kappa.c index a420ad6947c..d47f0b327cd 100644 --- a/raster/r.kappa/prt_kappa.c +++ b/raster/r.kappa/print_kappa.c @@ -4,7 +4,7 @@ #include "kappa.h" #include "local_proto.h" -void prt_kappa(void) +void print_kappa(void) { int i; FILE *fd; @@ -51,5 +51,5 @@ void prt_kappa(void) fclose(fd); /* print labels for categories of maps */ - prt_label(); + print_label(); } diff --git a/raster/r.kappa/prt_label.c b/raster/r.kappa/print_label.c similarity index 97% rename from raster/r.kappa/prt_label.c rename to raster/r.kappa/print_label.c index bf8f1f0e948..d4f37303197 100644 --- a/raster/r.kappa/prt_label.c +++ b/raster/r.kappa/print_label.c @@ -3,7 +3,7 @@ #include "kappa.h" #include "local_proto.h" -void prt_label(void) +void print_label(void) { int i, j; long *cats; diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/print_mat.c similarity index 98% rename from raster/r.kappa/prt_mat.c rename to raster/r.kappa/print_mat.c index 3eeaa511d36..9c98e45cb2c 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/print_mat.c @@ -4,7 +4,7 @@ #include "kappa.h" #include "local_proto.h" -void prn_error_mat(int out_cols, int hdr) +void print_error_mat(int out_cols, int hdr) { int num_panels, at_panel; int cndx, rndx; From 0d5b70514a7e2e1947470c135600bab426b1146f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Sat, 3 Dec 2022 12:59:27 +0200 Subject: [PATCH 17/20] r.kappa: use better summation approach --- raster/r.kappa/calc_metrics.c | 38 ++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 7b772a06629..919e00c4040 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -6,6 +6,7 @@ static int longcomp(const void *aa, const void *bb); static int collapse(long *l, int n); +static void update_sum(double *sum, double *c, double value); void calc_metrics(void) { @@ -15,8 +16,8 @@ void calc_metrics(void) int ncat1, ncat2; int cndx; double *pi, *pj, *pii; - double p0 = 0.0, pC = 0.0; - double inter1 = 0.0, inter2 = 0.0; + double p0 = 0.0, pC = 0.0, p0c = 0.0, pCc = 0.0; + double inter1 = 0.0, inter2 = 0.0, inter1c = 0.0, inter2c = 0.0; int a_i = 0, b_i = 0; metrics = (METRICS *)G_malloc(sizeof(METRICS)); @@ -142,10 +143,12 @@ void calc_metrics(void) else metrics->producers_accuracy[i] = 100 * (pii[i] / pj[i]); /* theta 1 */ - p0 += pii[i]; + update_sum(&p0, &p0c, pii[i]); /* theta 2 */ - pC += pi[i] * pj[i]; + update_sum(&pC, &pCc, pi[i] * pj[i]); } + p0 = p0 + p0c; + pC = pC + pCc; if (pC != 1) metrics->kappa = (p0 - pC) / (1 - pC); else @@ -158,7 +161,8 @@ void calc_metrics(void) else metrics->conditional_kappa[i] = (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); - inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); + update_sum(&inter1, &inter1c, + pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.)); } /* kappa variance */ @@ -170,10 +174,13 @@ void calc_metrics(void) if (Gstats[l].cats[1] == rlst[i]) b_i = i; } - inter2 += Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / - metrics->observations; + update_sum(&inter2, &inter2c, + Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / + metrics->observations); } } + inter1 = inter1 + inter1c; + inter2 = inter2 + inter2c; metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), 4.) / metrics->observations; @@ -213,3 +220,20 @@ static int longcomp(const void *aa, const void *bb) return (*a > *b); } + +/* Implements improved Kahan–Babuska algorithm by Neumaier, A. 1974 */ +void update_sum(double *sum, double *c, double value) +{ + double tmp; + + tmp = *sum + value; + + if (fabs(*sum) >= fabs(value)) + *c = *c + (*sum - tmp) + value; + else + *c = *c + (value - tmp) + *sum; + + *sum = tmp; + + return; +} From cb558c904c79573f9442403e6916b51f58c592df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Sat, 3 Dec 2022 15:13:19 +0200 Subject: [PATCH 18/20] r.kappa: implement reporting of MCC --- raster/r.kappa/calc_metrics.c | 24 +++++++++++++++++++++++- raster/r.kappa/kappa.h | 1 + raster/r.kappa/print_json.c | 3 ++- raster/r.kappa/print_kappa.c | 8 ++++++-- raster/r.kappa/r.kappa.html | 12 +++++++++--- raster/r.kappa/testsuite/test_r_kappa.py | 6 ++++++ 6 files changed, 47 insertions(+), 7 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 919e00c4040..123ffb087fd 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "kappa.h" @@ -19,6 +20,9 @@ void calc_metrics(void) double p0 = 0.0, pC = 0.0, p0c = 0.0, pCc = 0.0; double inter1 = 0.0, inter2 = 0.0, inter1c = 0.0, inter2c = 0.0; int a_i = 0, b_i = 0; + double spktk = 0.0, spktkc = 0.0; + double spk2 = 0.0, spk2c = 0.0, stk2 = 0.0, stk2c = 0.0; + double unrooted; metrics = (METRICS *)G_malloc(sizeof(METRICS)); if (nstats == 0) { @@ -29,6 +33,7 @@ void calc_metrics(void) metrics->overall_accuracy = 0.0; metrics->kappa = na_value; metrics->kappa_variance = na_value; + metrics->mcc = na_value; return; } @@ -184,10 +189,27 @@ void calc_metrics(void) metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), 4.) / metrics->observations; - G_free(pi); G_free(pj); G_free(pii); + + /* MCC */ + for (i = 0; i < ncat; i++) { + update_sum(&spktk, &spktkc, metrics->row_sum[i] * metrics->col_sum[i]); + update_sum(&spk2, &spk2c, metrics->row_sum[i] * metrics->row_sum[i]); + update_sum(&stk2, &stk2c, metrics->col_sum[i] * metrics->col_sum[i]); + } + spktk = spktk + spktkc; + spk2 = spk2 + spk2c; + stk2 = stk2 + stk2c; + unrooted = (metrics->observations * metrics->observations - spk2) * + (metrics->observations * metrics->observations - stk2); + if (unrooted <= 0.0) { + metrics->mcc = na_value; + return; + } + metrics->mcc = + (metrics->correct * metrics->observations - spktk) / sqrt(unrooted); }; /* remove repeated values */ diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 7e20092f52b..d5f1cf38d41 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -28,6 +28,7 @@ struct _metrics_ double kappa; double kappa_variance; double *conditional_kappa; + double mcc; }; extern struct Cell_head window; diff --git a/raster/r.kappa/print_json.c b/raster/r.kappa/print_json.c index 3d1ab360e9e..cb36d1efb9a 100644 --- a/raster/r.kappa/print_json.c +++ b/raster/r.kappa/print_json.c @@ -102,7 +102,8 @@ void print_json() fprintf(fd, ", "); fprintf(fd, "%.5f", metrics->conditional_kappa[i]); } - fprintf(fd, "]\n"); + fprintf(fd, "],\n"); + fprintf(fd, " \"mcc\": %.5f\n", metrics->mcc); fprintf(fd, "}\n"); if (output != NULL) diff --git a/raster/r.kappa/print_kappa.c b/raster/r.kappa/print_kappa.c index d47f0b327cd..85965eeb824 100644 --- a/raster/r.kappa/print_kappa.c +++ b/raster/r.kappa/print_kappa.c @@ -34,15 +34,19 @@ void print_kappa(void) fprintf(fd, "%f\n", metrics->conditional_kappa[i]); } fprintf(fd, "\n"); - fprintf(fd, "Kappa\t\tKappa Variance\n"); + fprintf(fd, "Kappa\t\tKappa Variance\tMCC\n"); if (metrics->kappa == na_value) fprintf(fd, "NA"); else fprintf(fd, "%f", metrics->kappa); if (metrics->kappa_variance == na_value) + fprintf(fd, "\tNA"); + else + fprintf(fd, "\t%f", metrics->kappa_variance); + if (metrics->mcc == na_value) fprintf(fd, "\tNA\n"); else - fprintf(fd, "\t%f\n", metrics->kappa_variance); + fprintf(fd, "\t%f\n", metrics->mcc); fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->observations, diff --git a/raster/r.kappa/r.kappa.html b/raster/r.kappa/r.kappa.html index 42c453b3368..c073e81dedc 100644 --- a/raster/r.kappa/r.kappa.html +++ b/raster/r.kappa/r.kappa.html @@ -69,6 +69,10 @@

    OUTPUT VARIABLES

    Variance of kappa index. Correctness needs to be validated.
    Conditional kappa
    Conditional user's kappa for specified class.
    +
    MCC
    +
    Matthews (Mattheus) Correlation Coefficient is implemented + according to Grandini, M., Bagli, E., Visani, G. 2020. + "Metrics for multi-class classification: An overview."

    NOTES

    @@ -106,9 +110,11 @@

    NOTES

    -Some of reported values (Choen's kappa, overall accuracy) can be +Some of reported values (overall accuracy, Choen's kappa, MCC) can be misleading if cell count among classes is not balanced. See e.g. -Powers, D.M.W., 2012. The Problem with Kappa. +Powers, D.M.W., 2012. "The Problem with Kappa"; Zhu, Q., 2020. +"On the performance of Matthews correlation coefficient (MCC) for +imbalanced dataset".

    EXAMPLE

    @@ -143,4 +149,4 @@

    SEE ALSO

    AUTHORS

    Tao Wen, University of Illinois at Urbana-Champaign, Illinois
    -Maris Nartiss, University of Latvia (JSON output) +Maris Nartiss, University of Latvia (JSON output, MCC) diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index ad6ba6e22ed..a763f9aeee1 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -181,6 +181,7 @@ def test_standard_output(self): # Kappa value vals = rows[28].split() self.assertTrue(self.match(vals[0], 0.52091)) + self.assertTrue(self.match(vals[2], 0.55930)) # Overall characteristics vals = rows[31].split() @@ -283,6 +284,7 @@ def test_standard_output(self): # Kappa value vals = rows[28].split() self.assertTrue(self.match(vals[0], 0.0)) + self.assertTrue(self.match(vals[2], "NA")) # Overall characteristics vals = rows[31].split() @@ -341,6 +343,7 @@ def setUpClass(cls): "producers_accuracy": [57.1429, 0.0, 100.0, -999.0, 100.0, 66.66666], "users_accuracy": [100.0, -999.0, 80.0, 0.0, 100.0, 50.0], "conditional_kappa": [1.0, -999.0, 0.742857, 0.0, 1.0, 0.400], + "mcc": 0.55930, } ) @@ -380,6 +383,7 @@ def setUpClass(cls): "producers_accuracy": [0.0, 0.0, 0.0, 0.0, 0.0, -999.0], "users_accuracy": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], "conditional_kappa": [-999.0, -999.0, -999.0, -999.0, -999.0, 0.0], + "mcc": -999.0, } ) @@ -410,6 +414,7 @@ def setUpClass(cls): "producers_accuracy": [], "users_accuracy": [], "conditional_kappa": [], + "mcc": -999.0, } ) @@ -440,6 +445,7 @@ def setUpClass(cls): "producers_accuracy": [], "users_accuracy": [], "conditional_kappa": [], + "mcc": -999.0, } ) From f40d3902074db62233756cf237a272851130dc8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Fri, 23 Dec 2022 13:06:43 +0200 Subject: [PATCH 19/20] r.kappa: beautify code --- raster/r.kappa/calc_metrics.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 123ffb087fd..9a9bc12e44c 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -246,16 +246,12 @@ static int longcomp(const void *aa, const void *bb) /* Implements improved Kahan–Babuska algorithm by Neumaier, A. 1974 */ void update_sum(double *sum, double *c, double value) { - double tmp; - - tmp = *sum + value; + double tmp = *sum + value; if (fabs(*sum) >= fabs(value)) - *c = *c + (*sum - tmp) + value; + *c += (*sum - tmp) + value; else - *c = *c + (value - tmp) + *sum; + *c += (value - tmp) + *sum; *sum = tmp; - - return; } From fad907914e59d69a94e2efb83e810c40af996afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Sat, 24 Dec 2022 10:37:44 +0200 Subject: [PATCH 20/20] Style change according to wishes of @wenzeslaus --- raster/r.kappa/calc_metrics.c | 14 +++++++------- raster/r.kappa/testsuite/test_r_kappa.py | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c index 9a9bc12e44c..63365624c9a 100644 --- a/raster/r.kappa/calc_metrics.c +++ b/raster/r.kappa/calc_metrics.c @@ -152,8 +152,8 @@ void calc_metrics(void) /* theta 2 */ update_sum(&pC, &pCc, pi[i] * pj[i]); } - p0 = p0 + p0c; - pC = pC + pCc; + p0 += p0c; + pC += pCc; if (pC != 1) metrics->kappa = (p0 - pC) / (1 - pC); else @@ -184,8 +184,8 @@ void calc_metrics(void) metrics->observations); } } - inter1 = inter1 + inter1c; - inter2 = inter2 + inter2c; + inter1 += inter1c; + inter2 += inter2c; metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), 4.) / metrics->observations; @@ -199,9 +199,9 @@ void calc_metrics(void) update_sum(&spk2, &spk2c, metrics->row_sum[i] * metrics->row_sum[i]); update_sum(&stk2, &stk2c, metrics->col_sum[i] * metrics->col_sum[i]); } - spktk = spktk + spktkc; - spk2 = spk2 + spk2c; - stk2 = stk2 + stk2c; + spktk += spktkc; + spk2 += spk2c; + stk2 += stk2c; unrooted = (metrics->observations * metrics->observations - spk2) * (metrics->observations * metrics->observations - stk2); if (unrooted <= 0.0) { diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index 5668372c558..56e356ca20b 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -149,6 +149,8 @@ def match(self, pat, ref): # 4 0 14 4 0 - 0.000 0.000 0.222 0.000 0.000 # 5 1 17 0 0 1.000 1.000 0.056 0.056 0.056 1.000 # 6 2 13 2 1 0.667 0.500 0.111 0.222 0.167 0.400 + # Correct MCC value was calculated manually and validated with + # mcc function of R package mltools. def test_standard_output(self): out = read_command(