Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,36 @@ API documenation can be found [here](http://antoniogarrote.github.com/clj-ml/ind
12,34,good
24,53,bad>

REPL>;Get the summary information on an the values that appear in a dataset
REPL> (dataset-attributes-stats ds)
REPL>({:stdDev 8.48528137423857,
:totalCount 2,
:mean 18.0,
:name "length",
:intCount 2,
:sumSq 720.0,
:uniqueCount 2,
:max 24.0,
:min 12.0,
:sum 36.0,
:distinctCount 2}
{:stdDev 13.435028842544403,
:totalCount 2,
:mean 43.5,
:name "width",
:intCount 2,
:sumSq 3965.0,
:uniqueCount 2,
:max 53.0,
:min 34.0,
:sum 87.0,
:distinctCount 2}
{:name "kind",
:intCount 2,
:distinctCount 2,
:uniqueCount 2,
:totalCount 2})

REPL>; Using datasets like sequences
REPL>(dataset-seq ds)

Expand Down
41 changes: 40 additions & 1 deletion src/clj_ml/data.clj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
that can be transformed using usual Clojure functions like map, reduce, etc."
(:use [clj-ml utils])
(:require [clj-ml.filters :as filters])
(:import (weka.core Instance Instances FastVector Attribute)
(:import (weka.core Instance Instances FastVector Attribute AttributeStats)
(cljml ClojureInstances)))

(declare dataset-seq)
Expand Down Expand Up @@ -336,6 +336,45 @@ If the class is nominal then the string value (not keyword) is returned."
(.stringValue instance pos)
val))))

(defn- extract-stats [[^Attribute att ^AttributeStats stat]]
(let [s {:name (attr-name att)
:intCount (.intCount stat)
:distinctCount (.distinctCount stat)
:uniqueCount (.uniqueCount stat)
:totalCount (.totalCount stat)}
n (if-let [num-stat (and (.isNumeric att)
(.numericStats stat))]
{:stdDev (.stdDev num-stat)
:min (.min num-stat)
:max (.max num-stat)
:mean (.mean num-stat)
:sum (.sum num-stat)
:sumSq (.sumSq num-stat)})]
(conj s n)))

(defn dataset-attribute-stats-at [^Instances dataset index-or-name]
(let [attr-idx (int (dataset-index-attr dataset index-or-name))]
(extract-stats [(.attribute dataset attr-idx)
(.attributeStats dataset attr-idx)])))

(defn instance-attribute-stats-at [^Instance instance index-or-name]
(dataset-attribute-stats-at (.dataset instance) index-or-name))

(defn attribute-stats-at
"Returns attribute statistics situated at the provided position or the provided name."
[dataset-or-instance index-or-name]
(if (is-instance? dataset-or-instance)
(instance-attribute-stats-at dataset-or-instance index-or-name)
(dataset-attribute-stats-at dataset-or-instance index-or-name)))

(defn dataset-attributes-stats
"Returns a sequence of the dataset attributes(weka.core.Attribute) statistics."
[^Instances dataset]
(let [idxs (range (.numAttributes dataset))
atts (map #(vector (.attribute dataset (int %))
(.attributeStats dataset (int %))) idxs)]
(map extract-stats atts)))

(defn instance-to-list
"Builds a list with the values of the instance"
[^Instance instance]
Expand Down
19 changes: 19 additions & 0 deletions test/clj_ml/data_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,25 @@
(is (= [(.attribute ds 0) (.attribute ds 1)] (numeric-attributes ds)))
(is (= '(:a :b :c) (attribute-names ds)))))

(deftest attributes-stats-tests
(let [ds (make-dataset "test-statistics" [:num-a {:categoric [:a :b :c]}]
[[10 :a]
[8 :b]
[5 :b]
[3 :c]
[2 :c]
[1 :c]])
attr-stats (dataset-attributes-stats ds)]
(is (= attr-stats (list {:stdDev 3.5449494589721118 :totalCount 6
:mean 4.833333333333333 :name "num-a"
:intCount 6 :sumSq 203.0
:uniqueCount 6 :max 10.0
:min 1.0 :sum 29.0
:distinctCount 6}
{:name "categoric" :intCount 6
:distinctCount 3 :uniqueCount 1
:totalCount 6})))))

(deftest replacing-attributes
(let [ds (make-dataset "test" [:a {:b [:foo :bar]}] [[1 :foo] [2 :bar]])
_ (dataset-replace-attribute! ds :b (nominal-attribute :b [:baz :shaz]))]
Expand Down