Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/mbi/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def datavector(self, flatten: bool = True) -> NDArray:
result = self.weights.sum()
return np.array([result]) if flatten else result
multi_index = tuple(self.df[a].values for a in self.domain.attrs)
linear_indices = np.ravel_multi_index(multi_index, dims, order='F')
linear_indices = np.ravel_multi_index(multi_index, dims, order='C')
counts = np.bincount(linear_indices, minlength=math.prod(dims), weights=self.weights)
return counts if flatten else counts.reshape(dims)

Expand Down
78 changes: 78 additions & 0 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,83 @@ def test_datavector(self):
self.assertTrue(vec.size, 3 * 4 * 5 * 6)


class TestDatasetDeterministic(unittest.TestCase):
def setUp(self):
import numpy as np
attrs = ['A', 'B']
shape = [2, 3]
self.domain = Domain(attrs, shape)

# Data: (0, 0), (0, 1), (1, 2), (0, 0)
self.data_dict = {
'A': np.array([0, 0, 1, 0]),
'B': np.array([0, 1, 2, 0])
}
self.dataset = Dataset(self.data_dict, self.domain)

self.weights = np.array([1.0, 2.0, 0.5, 1.0])
self.weighted_dataset = Dataset(self.data_dict, self.domain, self.weights)

def test_datavector_unweighted(self):
import numpy as np
# Expected: [2, 1, 0, 0, 0, 1]
expected = np.array([2, 1, 0, 0, 0, 1])
result = self.dataset.datavector(flatten=True)
np.testing.assert_array_equal(result, expected)

# Unflattened: [[2, 1, 0], [0, 0, 1]]
expected_unflat = np.array([[2, 1, 0], [0, 0, 1]])
result_unflat = self.dataset.datavector(flatten=False)
np.testing.assert_array_equal(result_unflat, expected_unflat)

def test_project_unweighted(self):
import numpy as np
# Project A
# Expected: [3, 1]
expected_A = np.array([3, 1])
proj_A = self.dataset.project('A')
np.testing.assert_array_equal(proj_A.datavector(), expected_A)
self.assertEqual(proj_A.domain.attrs, ('A',))

# Project B
# Expected: [2, 1, 1]
expected_B = np.array([2, 1, 1])
proj_B = self.dataset.project('B')
np.testing.assert_array_equal(proj_B.datavector(), expected_B)
self.assertEqual(proj_B.domain.attrs, ('B',))

def test_datavector_weighted(self):
import numpy as np
# Weights: [1.0, 2.0, 0.5, 1.0] for indices [0, 1, 5, 0]
# Expected: [2.0, 2.0, 0, 0, 0, 0.5]
expected = np.array([2.0, 2.0, 0, 0, 0, 0.5])
result = self.weighted_dataset.datavector(flatten=True)
np.testing.assert_array_equal(result, expected)

def test_project_weighted(self):
import numpy as np
# Project A
# 0: 1.0 + 2.0 + 1.0 = 4.0
# 1: 0.5
expected_A = np.array([4.0, 0.5])
proj_A = self.weighted_dataset.project('A')
np.testing.assert_array_equal(proj_A.datavector(), expected_A)

# Project B
# 0: 1.0 + 1.0 = 2.0
# 1: 2.0
# 2: 0.5
expected_B = np.array([2.0, 2.0, 0.5])
proj_B = self.weighted_dataset.project('B')
np.testing.assert_array_equal(proj_B.datavector(), expected_B)

def test_project_multiple_columns(self):
import numpy as np
# Project (A, B) - should be same as full datavector
expected = np.array([2, 1, 0, 0, 0, 1])
proj = self.dataset.project(['A', 'B'])
np.testing.assert_array_equal(proj.datavector(), expected)


if __name__ == "__main__":
unittest.main()