diff --git a/README.md b/README.md index c809894..88e6916 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,13 @@ mathex ====== [![Build Status](https://api.travis-ci.org/nisbus/mathex.png?branch=master)](https://api.travis-ci.org/nisbus/mathex.png?branch=master) -Extra math functions for Erlang +Extra math and statistics functions for Erlang. +The library provides calculations such as moving averages, +standard deviation, skewness, kurtosis and correlation. + +The API can be used standalone from any Erlang application. +All functions operate on lists of numbers and return either a single +value or a list of values. This library contains the following functions: @@ -43,11 +49,16 @@ The float is the correlation of the two. It will give you all possible combinations and their correlation. -# To Do -Add a sort function to return the correlation_matrix sorted desc/asc. -Add more functions. -Add unit tests. -Add more documentation. + +## Running tests + +EUnit tests are included and can be executed with: + +```bash +rebar3 eunit +``` + +The tests cover the public API and a few edge cases. ## Documentation diff --git a/src/mathex.erl b/src/mathex.erl index 46e4641..138c500 100644 --- a/src/mathex.erl +++ b/src/mathex.erl @@ -12,13 +12,14 @@ %% API -export([moving_average/1,average/1,sum/1, - stdev_sample/1,stdev_population/1, - skew/1,kurtosis/1,variance/1, - covariance/2,correlation/2,correlation_matrix/1]). + stdev_sample/1,stdev_population/1, + skew/1,kurtosis/1,variance/1, + covariance/2,correlation/2,correlation_matrix/1]). %%%=================================================================== %%% API %%%=================================================================== -spec moving_average(Data :: [] | [float()|integer]) -> []|[float()]. +%% @doc Calculate the cumulative moving average for `Data`. moving_average([]) -> []; moving_average([H|_] = Data) when is_list(Data) -> @@ -34,17 +35,23 @@ moving_average([H|_] = Data) when is_list(Data) -> lists:reverse(Res). -spec stdev_sample(Data :: [] | [float()|integer()]) -> float(). +%% @doc Sample standard deviation using n-1 in the denominator. stdev_sample([])-> 0.0; stdev_sample(Data) when is_list(Data) -> - C = length(Data), - A = average(Data), - math:sqrt(lists:foldl(fun(X,Acc) -> - Acc+math:pow(X-A,2)/ (C-1) - end, 0,Data)). + case length(Data) of + N when N < 2 -> + 0.0; + C -> + A = average(Data), + math:sqrt(lists:foldl(fun(X,Acc) -> + Acc+math:pow(X-A,2)/(C-1) + end, 0, Data)) + end. -spec stdev_population(Data :: [] | [float()|integer()]) -> float(). -stdev_population([]) -> +%% @doc Population standard deviation. +stdev_population([]) -> 0.0; stdev_population(Data) when is_list(Data) -> A = average(Data), @@ -53,31 +60,42 @@ stdev_population(Data) when is_list(Data) -> end,Data))). -spec skew(Data :: [] | [float()|integer()]) -> float(). +%% @doc Unbiased estimator of skewness. skew([])-> 0.0; skew(Data) when is_list(Data) -> - A = average(Data), C = length(Data), - Std = stdev_sample(Data), - Mult = (C)/((C-1)*(C-2)), - sum(lists:map(fun(X) -> - math:pow(((X-A)/Std),3) - end,Data))*Mult. + case C < 3 of + true -> 0.0; + false -> + A = average(Data), + Std = stdev_sample(Data), + Mult = C / ((C-1)*(C-2)), + sum(lists:map(fun(X) -> + math:pow(((X-A)/Std),3) + end, Data)) * Mult + end. -spec kurtosis(Data :: [] | [float()|integer()]) -> float(). +%% @doc Unbiased estimator of kurtosis. kurtosis([]) -> 0.0; kurtosis(Data) when is_list(Data) -> - A = average(Data), C = length(Data), - Std = stdev_sample(Data), - Mult = ((C) * (C+1)) / ((C-1) * (C -2) * (C - 3)), - Sub = 3 * (math:pow(C-1,2)) / ((C-2) * (C -3)), - Mult * sum(lists:map(fun(X) -> - math:pow( ((X-A)/Std),4) - end,Data))-Sub. + case C < 4 of + true -> 0.0; + false -> + A = average(Data), + Std = stdev_sample(Data), + Mult = (C * (C+1)) / ((C-1) * (C-2) * (C-3)), + Sub = 3 * math:pow(C-1,2) / ((C-2) * (C-3)), + Mult * sum(lists:map(fun(X) -> + math:pow(((X-A)/Std),4) + end, Data)) - Sub + end. -spec variance(Data :: [] | [float()|integer()]) -> float(). +%% @doc Population variance. variance([]) -> 0.0; variance(Data) when is_list(Data) -> @@ -88,26 +106,48 @@ variance(Data) when is_list(Data) -> end,Data))/C. -spec covariance(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float(). +%% @doc Sample covariance of two data sets. Lists of unequal length are truncated to the shortest. covariance([],_) -> 0.0; covariance(_,[]) -> 0.0; covariance(Xs,Ys) when is_list(Xs) and is_list(Ys) -> - AveX = average(Xs), - AveY = average(Ys), - sum(lists:zipwith(fun(X,Y)-> - (X-AveX)*(Y-AveY) - end,Xs,Ys))/(length(Xs)-1). + Pairs = lists:zip(Xs,Ys), + case length(Pairs) of + N when N < 2 -> 0.0; + Len -> + X1 = [X || {X,_} <- Pairs], + Y1 = [Y || {_,Y} <- Pairs], + AveX = average(X1), + AveY = average(Y1), + sum([ (X-AveX)*(Y-AveY) || {X,Y} <- Pairs ])/(Len-1) + end. -spec correlation(Xs :: [] | [float()|integer()],Ys :: [] | [float()|integer()]) -> float(). +%% @doc Pearson correlation coefficient for two data sets. correlation([],_) -> 0.0; correlation(_,[]) -> 0.0; -correlation(Xs,Ys) -> - covariance(Xs,Ys) /(stdev_sample(Xs)*stdev_sample(Ys)). +correlation(Xs,Ys) when is_list(Xs) and is_list(Ys) -> + Pairs = lists:zip(Xs,Ys), + case length(Pairs) of + N when N < 2 -> 0.0; + _ -> + X1 = [X || {X,_} <- Pairs], + Y1 = [Y || {_,Y} <- Pairs], + StdX = stdev_sample(X1), + StdY = stdev_sample(Y1), + case StdX =:= 0.0 orelse StdY =:= 0.0 of + true -> 0.0; + false -> + Cov = covariance(X1,Y1), + Cov/(StdX*StdY) + end + end. -spec correlation_matrix(ListOfLists :: [] | [[float()|integer()]]) -> [{integer(),integer(),float()}]. +%% @doc Correlate each list in `ListOfLists` with the others. correlation_matrix([]) -> []; correlation_matrix(ListOfLists) -> @@ -122,12 +162,14 @@ correlation_matrix(ListOfLists) -> end,R))). -spec average(Data :: [] | [float()|integer()]) -> float(). +%% @doc Arithmetic mean of the list. average([]) -> 0.0; average(Data) -> sum(Data)/length(Data). -spec sum(Data :: [] | [float()|integer()]) -> float()|integer(). +%% @doc Sum of all elements in `Data`. sum([]) -> 0.0; sum(Data) -> diff --git a/test/mathex_tests.erl b/test/mathex_tests.erl new file mode 100644 index 0000000..e1deb96 --- /dev/null +++ b/test/mathex_tests.erl @@ -0,0 +1,41 @@ +-module(mathex_tests). +-include_lib("eunit/include/eunit.hrl"). + +moving_average_test() -> + ?assertEqual([1,1.5,2.0], mathex:moving_average([1,2,3])). + +average_test() -> + ?assertEqual(2.0, mathex:average([1,2,3])). + +sum_test() -> + ?assertEqual(6, mathex:sum([1,2,3])). + +stdev_sample_test() -> + R = mathex:stdev_sample([2,4,4,4,5,5,7,9]), + ?assert(abs(R - 2.138089935299395) < 1.0e-6). + +stdev_population_test() -> + R = mathex:stdev_population([2,4,4,4,5,5,7,9]), + ?assert(abs(R - 2.0) < 1.0e-6). + +variance_test() -> + R = mathex:variance([1,2,3,4]), + ?assert(abs(R - 1.25) < 1.0e-6). + +covariance_test() -> + R = mathex:covariance([2.1,2.5,4.0,3.6],[8,12,14,10]), + ?assert(abs(R - 1.5333333333333332) < 1.0e-6). + +correlation_test() -> + R = mathex:correlation([2.1,2.5,4.0,3.6],[8,12,14,10]), + ?assert(abs(R - 0.6625738822030289) < 1.0e-6). + +correlation_matrix_test() -> + R = mathex:correlation_matrix([[1,2,3],[2,3,4]]), + ?assertEqual([{1,2,1.0},{2,1,1.0}], R). + +%% edge case tests +single_element_safe_test() -> + ?assertEqual(0.0, mathex:stdev_sample([1])), + ?assertEqual(0.0, mathex:covariance([1],[1])), + ?assertEqual(0.0, mathex:correlation([1],[1])).