From f31809a8cddd7444bafc593482845aa4226b3e87 Mon Sep 17 00:00:00 2001 From: Nick Hamblet Date: Mon, 6 Oct 2014 11:14:31 -0400 Subject: [PATCH] Added a GaussianComparator, with unit tests This seems like a good replacement for NumericComparator, which has issues with negative values (issue #181), and also doesn't take scale into account (e.g., 0.01 may be far from 0.0 in some instances, but effectively the same in others). --- .../duke/comparators/GaussianComparator.java | 46 +++++++++++++++++++ .../duke/test/GaussianComparatorTest.java | 44 ++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/main/java/no/priv/garshol/duke/comparators/GaussianComparator.java create mode 100644 src/test/java/no/priv/garshol/duke/test/GaussianComparatorTest.java diff --git a/src/main/java/no/priv/garshol/duke/comparators/GaussianComparator.java b/src/main/java/no/priv/garshol/duke/comparators/GaussianComparator.java new file mode 100644 index 00000000..a39392fe --- /dev/null +++ b/src/main/java/no/priv/garshol/duke/comparators/GaussianComparator.java @@ -0,0 +1,46 @@ +package no.priv.garshol.duke.comparators; + +import no.priv.garshol.duke.Comparator; + +public class GaussianComparator implements Comparator { + + private Double sigma = 1.0; + private Double sigmaSq = 1.0; + private Double powerCoef = -0.5; + private final Double sqrtTwoPi = 2.50662827; + private Double linearCoef = 1.0 / sqrtTwoPi; + + public boolean isTokenized() { + return false; + } + + public void setSigma(Double sigma) { + this.sigma = sigma; + updateValues(); + } + + // We return the probability (almost) of drawing v1 from a gaussian with mean v2 and standard deviation sigma. + // It's not the honest probability, because we don't multiply by the normalizing factor (linearCoef). + // This makes values somewhat easier to think about, because you don't have to know the maximum value of the + // normal distribution for your chosen sigma. + public double compare(String v1, String v2) { + // begin like NumericComparator + double d1 = 0.0; + double d2 = 0.0; + try { + d1 = Double.parseDouble(v1); + d2 = Double.parseDouble(v2); + } catch (NumberFormatException e) { + return 0.5; // we just ignore this. whether it's wise I'm not sure + } + + double diff = Math.abs(d1 - d2); + return Math.exp(powerCoef * diff * diff); + } + + private void updateValues() { + sigmaSq = sigma * sigma; + powerCoef = -1.0 / (2.0 * sigmaSq); + linearCoef = 1.0 / (sigma * sqrtTwoPi); + } +} diff --git a/src/test/java/no/priv/garshol/duke/test/GaussianComparatorTest.java b/src/test/java/no/priv/garshol/duke/test/GaussianComparatorTest.java new file mode 100644 index 00000000..636529a3 --- /dev/null +++ b/src/test/java/no/priv/garshol/duke/test/GaussianComparatorTest.java @@ -0,0 +1,44 @@ +package no.priv.garshol.duke.test; + +import org.junit.Test; +import org.junit.Before; +import static junit.framework.Assert.assertEquals; + +import no.priv.garshol.duke.comparators.GaussianComparator; + +public class GaussianComparatorTest { + + private GaussianComparator defaultComparator; + private GaussianComparator sigma10Comparator; + + @Before + public void setUp() { + defaultComparator = new GaussianComparator(); + sigma10Comparator = new GaussianComparator(); + sigma10Comparator.setSigma(10.0); + } + + @Test + public void testEqual() { + assertEquals(1.0, defaultComparator.compare("42", "42")); + assertEquals(1.0, sigma10Comparator.compare("42", "42")); + } + + @Test + public void testEqual2() { + assertEquals(1.0, defaultComparator.compare("42.0", "42.0")); + assertEquals(1.0, sigma10Comparator.compare("42.0", "42.0")); + } + + @Test + public void testClose() { + assertEquals(0.1353, defaultComparator.compare("40", "42"), 0.0001); + assertEquals(0.9801, sigma10Comparator.compare("40", "42"), 0.0001); + } + + @Test + public void testFar() { + assertEquals(0.0, defaultComparator.compare("25", "42"), 0.0001); + assertEquals(0.2357, sigma10Comparator.compare("25", "42"), 0.0001); + } +}