diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb
index 2184465..084dba4 100644
--- a/examples/quickstart.ipynb
+++ b/examples/quickstart.ipynb
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -93,14 +93,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Actual data\n",
     "df_actual = pd.DataFrame(\n",
     "    {\n",
-    "        \"target\": [0, 1, 1, 2, 2, 3, 3, 4],\n",
+    "        \"lr_target\": [0, 1, 1, 2, 2, 3, 3, 4],\n",
     "        \"covariate_1\": [3, 2, 4, 5, 2, 6, 8, 5],\n",
     "    },\n",
     "    index=index,\n",
@@ -109,21 +109,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Point predictions\n",
-    "df1_point = pd.DataFrame({\"pred_target\": [1, 3, 5, 7]}, index=index_0)\n",
-    "df2_point = pd.DataFrame({\"pred_target\": [2, 4, 6, 8]}, index=index_1)\n",
+    "df1_point = pd.DataFrame({\"pred_lr_target\": [1, 3, 5, 7]}, index=index_0)\n",
+    "df2_point = pd.DataFrame({\"pred_lr_target\": [2, 4, 6, 8]}, index=index_1)\n",
     "dfs_point = [df1_point, df2_point]\n",
     "\n",
     "# Uncertainty\n",
     "df1_uncertainty = pd.DataFrame(\n",
-    "    {\"pred_target\": [[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]]}, index=index_0\n",
+    "    {\"pred_lr_target\": [[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]]}, index=index_0\n",
     ")\n",
     "df2_uncertainty = pd.DataFrame(\n",
-    "    {\"pred_target\": [[4, 6, 8], [5, 7, 9], [6, 8, 10], [7, 9, 11]]}, index=index_1\n",
+    "    {\"pred_lr_target\": [[4, 6, 8], [5, 7, 9], [6, 8, 10], [7, 9, 11]]}, index=index_1\n",
     ")\n",
     "dfs_uncertainty = [df1_uncertainty, df2_uncertainty]"
    ]
@@ -149,7 +149,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "metrics_list = ['RMSLE', 'CRPS'] # Add other metrics as needed\n",
+    "metrics_list = ['RMSLE', 'CRPS', 'MIS'] # Add other metrics as needed\n",
     "evaluation_manager = EvaluationManager(metrics_list)"
    ]
   },
@@ -162,17 +162,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Metric MIS is not a default metric, skipping...\n",
+      "Metric MIS is not a default metric, skipping...\n",
+      "Metric MIS is not a default metric, skipping...\n"
+     ]
+    }
+   ],
    "source": [
     "steps = [1, 2]\n",
-    "point_evaluation_results = evaluation_manager.evaluate(df_actual, dfs_point, target='target', steps=steps)"
+    "point_evaluation_results = evaluation_manager.evaluate(df_actual, dfs_point, target='lr_target', steps=steps)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -190,7 +200,7 @@
        " ts01  0.420849   2.0)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -208,21 +218,56 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Metric RMSLE is not a default metric, skipping...\n",
+      "Metric RMSLE is not a default metric, skipping...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
       "Metric RMSLE is not a default metric, skipping...\n",
       "Metric RMSLE is not a default metric, skipping...\n"
      ]
     }
    ],
    "source": [
-    "uncertainty_evaluation_results = evaluation_manager.evaluate(df_actual, dfs_uncertainty, target='target', steps=steps)"
+    "uncertainty_evaluation_results = evaluation_manager.evaluate(df_actual, dfs_uncertainty, target='lr_target', steps=steps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(              CRPS     MIS\n",
+       " month100  0.555556    3.90\n",
+       " month101  2.333333   65.85\n",
+       " month102  4.111111  127.80,\n",
+       "             CRPS    MIS\n",
+       " step01  1.833333  45.85\n",
+       " step02  2.833333  85.85,\n",
+       "           CRPS    MIS\n",
+       " ts00  1.055556   23.9\n",
+       " ts01  3.611111  107.8)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "uncertainty_evaluation_results['month'][1], uncertainty_evaluation_results['step'][1], uncertainty_evaluation_results['time_series'][1]"
    ]
   },
   {
@@ -234,18 +279,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Get the evaluation type, i.e., uncertainty or point\n",
-    "is_uncertainty = EvaluationManager.get_evaluation_type(dfs_point)\n",
-    "month_point_evaluation_results = evaluation_manager.month_wise_evaluation(df_actual, dfs_point, target='target', is_uncertainty=is_uncertainty)"
+    "actual = EvaluationManager.transform_data(\n",
+    "            EvaluationManager.convert_to_arrays(df_actual), 'lr_target'\n",
+    "        )\n",
+    "predictions = [\n",
+    "    EvaluationManager.transform_data(\n",
+    "        EvaluationManager.convert_to_arrays(pred), f\"pred_lr_target\"\n",
+    "    )\n",
+    "    for pred in dfs_point\n",
+    "]\n",
+    "is_uncertainty = EvaluationManager.get_evaluation_type(predictions)\n",
+    "month_point_evaluation_results = evaluation_manager.month_wise_evaluation(actual, predictions, target='lr_target', is_uncertainty=is_uncertainty)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -262,6 +316,13 @@
    "source": [
     "print(month_point_evaluation_results[1])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/pyproject.toml b/pyproject.toml
index bbb847d..5b66657 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "views_evaluation"
-version = "0.2.0"
+version = "0.4.0"
 description = ""
 authors = [
     "Xiaolong Sun <xiaolong.sun@pcr.uu.se>",
diff --git a/tests/test_evaluation_manager.py b/tests/test_evaluation_manager.py
index 7498b75..46aec9c 100644
--- a/tests/test_evaluation_manager.py
+++ b/tests/test_evaluation_manager.py
@@ -55,19 +55,19 @@ def mock_actual():
     )
     df = pd.DataFrame(
         {
-            "target": [0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            "covariate_1": [3, 2, 4, 5, 2, 6, 8, 5, 3, 2, 9, 4],
+            "target": [0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, 6.0],
+            "covariate_1": [3.0, 2.0, 4.0, 5.0, 2.0, 6.0, 8.0, 5.0, 3.0, 2.0, 9.0, 4.0],
         },
         index=index,
     )
-    return df
+    return EvaluationManager.convert_to_arrays(df)
 
 
 @pytest.fixture
 def mock_point_predictions(mock_index):
-    df1 = pd.DataFrame({"pred_target": [1, 3, 5, 7, 9, 7]}, index=mock_index[0])
-    df2 = pd.DataFrame({"pred_target": [2, 4, 6, 8, 10, 8]}, index=mock_index[1])
-    return [df1, df2]
+    df1 = pd.DataFrame({"pred_target": [1.0, 3.0, 5.0, 7.0, 9.0, 7.0]}, index=mock_index[0])
+    df2 = pd.DataFrame({"pred_target": [2.0, 4.0, 6.0, 8.0, 10.0, 8.0]}, index=mock_index[1])
+    return [EvaluationManager.convert_to_arrays(df1), EvaluationManager.convert_to_arrays(df2)]
 
 
 @pytest.fixture
@@ -75,12 +75,12 @@ def mock_uncertainty_predictions(mock_index):
     df1 = pd.DataFrame(
         {
             "pred_target": [
-                [1, 2, 3],
-                [2, 3, 4],
-                [3, 4, 5],
-                [4, 5, 6],
-                [5, 6, 7],
-                [6, 7, 8],
+                [1.0, 2.0, 3.0],
+                [2.0, 3.0, 4.0],
+                [3.0, 4.0, 5.0],
+                [4.0, 5.0, 6.0],
+                [5.0, 6.0, 7.0],
+                [6.0, 7.0, 8.0],
             ]
         },
         index=mock_index[0],
@@ -88,74 +88,59 @@ def mock_uncertainty_predictions(mock_index):
     df2 = pd.DataFrame(
         {
             "pred_target": [
-                [4, 6, 8],
-                [5, 7, 9],
-                [6, 8, 10],
-                [7, 9, 11],
-                [8, 10, 12],
-                [9, 11, 13],
+                [4.0, 6.0, 8.0],
+                [5.0, 7.0, 9.0],
+                [6.0, 8.0, 10.0],
+                [7.0, 9.0, 11.0],
+                [8.0, 10.0, 12.0],
+                [9.0, 11.0, 13.0],
             ]
         },
         index=mock_index[1],
     )
-    return [df1, df2]
+    return [EvaluationManager.convert_to_arrays(df1), EvaluationManager.convert_to_arrays(df2)]
 
 
 def test_validate_dataframes_valid_type(mock_point_predictions):
     with pytest.raises(TypeError):
         EvaluationManager.validate_predictions(
-            mock_point_predictions[0], "target", is_uncertainty=False
+            mock_point_predictions[0], "target"
         )
 
 
 def test_validate_dataframes_valid_columns(mock_point_predictions):
     with pytest.raises(ValueError):
         EvaluationManager.validate_predictions(
-            mock_point_predictions, "y", is_uncertainty=False
+            mock_point_predictions, "y"
         )
 
-
-def test_validate_dataframes_valid_point(mock_uncertainty_predictions):
-    with pytest.raises(ValueError):
-        EvaluationManager.validate_predictions(
-            mock_uncertainty_predictions, "target", is_uncertainty=False
-        )
-
-
-def test_validate_dataframes_valid_uncertainty(mock_point_predictions):
-    with pytest.raises(ValueError):
-        EvaluationManager.validate_predictions(
-            mock_point_predictions, "devpar", is_uncertainty=True
-        )
-
-
 def test_get_evaluation_type():
     # Test case 1: All DataFrames for uncertainty evaluation
     predictions_uncertainty = [
-        pd.DataFrame({'pred_target': [[1, 2], [3, 4]]}),
-        pd.DataFrame({'pred_target': [[5, 6], [7, 8]]}),
+        pd.DataFrame({'pred_target': [[1.0, 2.0], [3.0, 4.0]]}),
+        pd.DataFrame({'pred_target': [[5.0, 6.0], [7.0, 8.0]]}),
     ]
     assert EvaluationManager.get_evaluation_type(predictions_uncertainty) == True
 
     # Test case 2: All DataFrames for point evaluation
     predictions_point = [
-        pd.DataFrame({'pred_target': [1.0, 2.0]}),
-        pd.DataFrame({'pred_target': [3.0, 4.0]}),
+        pd.DataFrame({'pred_target': [[1.0], [2.0]]}),
+        pd.DataFrame({'pred_target': [[3.0], [4.0]]}),
     ]
     assert EvaluationManager.get_evaluation_type(predictions_point) == False
 
     # Test case 3: Mixed evaluation types
     predictions_mixed = [
-        pd.DataFrame({'pred_target': [[1, 2], [3, 4]]}),
-        pd.DataFrame({'pred_target': [5.0, 6.0]}),
+        pd.DataFrame({'pred_target': [[1.0, 2.0], [3.0, 4.0]]}),
+        pd.DataFrame({'pred_target': [[5.0], [6.0]]}),
     ]
     with pytest.raises(ValueError):
         EvaluationManager.get_evaluation_type(predictions_mixed)
 
     # Test case 4: Single element lists
     predictions_single_element = [
-        pd.DataFrame({'pred_target': [[1], [2]]}),
-        pd.DataFrame({'pred_target': [[3], [4]]}),
+        pd.DataFrame({'pred_target': [[1.0], [2.0]]}),
+        pd.DataFrame({'pred_target': [[3.0], [4.0]]}),
     ]
     assert EvaluationManager.get_evaluation_type(predictions_single_element) == False
 
@@ -164,8 +149,8 @@ def test_match_actual_pred_point(
     mock_actual, mock_point_predictions, mock_uncertainty_predictions, mock_index
 ):
     df_matched = [
-        pd.DataFrame({"target": [1, 2, 2, 3, 3, 4]}, index=mock_index[0]),
-        pd.DataFrame({"target": [2, 3, 3, 4, 4, 5]}, index=mock_index[1]),
+        pd.DataFrame({"target": [[1.0], [2.0], [2.0], [3.0], [3.0], [4.0]]}, index=mock_index[0]),
+        pd.DataFrame({"target": [[2.0], [3.0], [3.0], [4.0], [4.0], [5.0]]}, index=mock_index[1]),
     ]
     for i in range(len(df_matched)):
         df_matched_actual_point, df_matched_point = (
@@ -186,44 +171,44 @@ def test_match_actual_pred_point(
 
 def test_split_dfs_by_step(mock_point_predictions, mock_uncertainty_predictions):
     df_splitted_point = [
-        pd.DataFrame(
-            {"pred_target": [1, 3, 2, 4]},
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[1.0], [3.0], [2.0], [4.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(100, 1), (100, 2), (101, 1), (101, 2)], names=["month", "country"]
             ),
-        ),
-        pd.DataFrame(
-            {"pred_target": [5, 7, 6, 8]},
+        )),
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[5.0], [7.0], [6.0], [8.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(101, 1), (101, 2), (102, 1), (102, 2)], names=["month", "country"]
             ),
-        ),
-        pd.DataFrame(
-            {"pred_target": [9, 7, 10, 8]},
+        )),
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[9.0], [7.0], [10.0], [8.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(102, 1), (102, 2), (103, 1), (103, 2)], names=["month", "country"]
             ),
-        ),
+        )),
     ]
     df_splitted_uncertainty = [
-        pd.DataFrame(
-            {"pred_target": [[1, 2, 3], [2, 3, 4], [4, 6, 8], [5, 7, 9]]},
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0], [4.0, 6.0, 8.0], [5.0, 7.0, 9.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(100, 1), (100, 2), (101, 1), (101, 2)], names=["month", "country"]
             ),
-        ),
-        pd.DataFrame(
-            {"pred_target": [[3, 4, 5], [4, 5, 6], [6, 8, 10], [7, 9, 11]]},
+        )),
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[3.0, 4.0, 5.0], [4.0, 5.0, 6.0], [6.0, 8.0, 10.0], [7.0, 9.0, 11.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(101, 1), (101, 2), (102, 1), (102, 2)], names=["month", "country"]
             ),
-        ),
-        pd.DataFrame(
-            {"pred_target": [[5, 6, 7], [6, 7, 8], [8, 10, 12], [9, 11, 13]]},
+        )),
+        EvaluationManager.convert_to_arrays(pd.DataFrame(
+            {"pred_target": [[5.0, 6.0, 7.0], [6.0, 7.0, 8.0], [8.0, 10.0, 12.0], [9.0, 11.0, 13.0]]},
             index=pd.MultiIndex.from_tuples(
                 [(102, 1), (102, 2), (103, 1), (103, 2)], names=["month", "country"]
             ),
-        ),
+        )),
     ]
     df_splitted_point_test = EvaluationManager._split_dfs_by_step(
         mock_point_predictions
@@ -393,3 +378,54 @@ def test_month_wise_evaluation_uncertainty(mock_actual, mock_uncertainty_predict
         evaluation_dict.keys()
     )
     assert np.allclose(df_evaluation, df_evaluation_test, atol=0.000001)
+
+
+def test_calculate_ap_point_predictions():
+    actual_data = {'target': [[40], [20], [35], [25]]}
+    pred_data = {'pred_target': [[35], [30], [20], [15]]}
+    threshold=30
+    
+    matched_actual = pd.DataFrame(actual_data)
+    matched_pred = pd.DataFrame(pred_data)
+    
+    from views_evaluation.evaluation.metric_calculators import calculate_ap
+    ap_score = calculate_ap(matched_actual, matched_pred, 'target', threshold)
+    
+    actual_binary = [1, 0, 1, 0]  # 40>30, 20<30, 35>30, 25<30
+    pred_binary = [1, 1, 0, 0]    # 35>30, 30=30, 20<30, 15<30
+    from sklearn.metrics import average_precision_score
+    expected_ap = average_precision_score(actual_binary, pred_binary)
+    
+    assert abs(ap_score - expected_ap) < 0.01
+
+
+def test_calculate_ap_uncertainty_predictions():
+    actual_data = {'target': [[40], [20], [35], [25]]}
+    pred_data = {
+        'pred_target': [
+            [35, 40, 45],
+            [30, 35, 40],
+            [20, 25, 30],
+            [15, 20, 25]
+        ]
+    }
+    threshold=30
+    matched_actual = pd.DataFrame(actual_data)
+    matched_pred = pd.DataFrame(pred_data)
+    
+    from views_evaluation.evaluation.metric_calculators import calculate_ap
+    ap_score = calculate_ap(matched_actual, matched_pred, 'target', threshold)
+    
+    pred_values = [35, 40, 45, 30, 35, 40, 20, 25, 30, 15, 20, 25]
+    actual_values = [40, 40, 40, 20, 20, 20, 35, 35, 35, 25, 25, 25]
+    actual_binary = [1 if x > threshold else 0 for x in actual_values]
+    pred_binary = [1 if x >= threshold else 0 for x in pred_values]
+
+    from sklearn.metrics import average_precision_score
+    expected_ap = average_precision_score(actual_binary, pred_binary)
+    
+    assert abs(ap_score - expected_ap) < 0.01
+
+
+
+
diff --git a/tests/test_metric_calculators.py b/tests/test_metric_calculators.py
new file mode 100644
index 0000000..1ee54f1
--- /dev/null
+++ b/tests/test_metric_calculators.py
@@ -0,0 +1,149 @@
+import pytest
+import pandas as pd
+import numpy as np
+from views_evaluation.evaluation.metric_calculators import (
+    calculate_rmsle,
+    calculate_crps,
+    calculate_ap,
+    calculate_emd,
+    calculate_pearson,
+    calculate_coverage,
+    calculate_ignorance_score,
+    calculate_mean_interval_score,
+    POINT_METRIC_FUNCTIONS,
+    UNCERTAINTY_METRIC_FUNCTIONS,
+)
+
+
+@pytest.fixture
+def sample_data():
+    """Create sample data for testing."""
+    actual = pd.DataFrame({
+        'target': [[1.0], [2.0], [3.0], [4.0]]
+    })
+    pred = pd.DataFrame({
+        'pred_target': [[1.1], [1.9], [3.1], [3.9]]
+    })
+    return actual, pred
+
+
+@pytest.fixture
+def sample_uncertainty_data():
+    """Create sample uncertainty data for testing."""
+    actual = pd.DataFrame({
+        'target': [[1.0], [2.0], [3.0], [4.0]]
+    })
+    pred = pd.DataFrame({
+        'pred_target': [[1.0, 1.1, 1.2], [1.8, 2.0, 2.2], [2.9, 3.0, 3.1], [3.8, 4.0, 4.2]]
+    })
+    return actual, pred
+
+
+def test_calculate_rmsle(sample_data):
+    """Test RMSLE calculation."""
+    actual, pred = sample_data
+    result = calculate_rmsle(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_crps(sample_uncertainty_data):
+    """Test CRPS calculation."""
+    actual, pred = sample_uncertainty_data
+    result = calculate_crps(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_ap(sample_data):
+    """Test Average Precision calculation."""
+    actual, pred = sample_data
+    result = calculate_ap(actual, pred, 'target', threshold=2.5)
+    assert isinstance(result, float)
+    assert 0 <= result <= 1
+
+
+def test_calculate_emd(sample_data):
+    """Test Earth Mover's Distance calculation."""
+    actual, pred = sample_data
+    result = calculate_emd(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_pearson(sample_data):
+    """Test Pearson correlation calculation."""
+    actual, pred = sample_data
+    result = calculate_pearson(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert -1 <= result <= 1
+
+
+def test_calculate_coverage(sample_uncertainty_data):
+    """Test Coverage calculation."""
+    actual, pred = sample_uncertainty_data
+    result = calculate_coverage(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert 0 <= result <= 1
+
+
+def test_calculate_ignorance_score(sample_uncertainty_data):
+    """Test Ignorance Score calculation."""
+    actual, pred = sample_uncertainty_data
+    result = calculate_ignorance_score(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_mis(sample_uncertainty_data):
+    """Test Mean Interval Score calculation."""
+    actual, pred = sample_uncertainty_data
+    result = calculate_mean_interval_score(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_point_metric_functions():
+    """Test that all point metric functions are available."""
+    expected_metrics = [
+        "RMSLE", "CRPS", "AP", "EMD", "SD", "pEMDiv", "Pearson", "Variogram"
+    ]
+    
+    for metric in expected_metrics:
+        assert metric in POINT_METRIC_FUNCTIONS
+        assert callable(POINT_METRIC_FUNCTIONS[metric])
+
+
+def test_uncertainty_metric_functions():
+    """Test that all uncertainty metric functions are available."""
+    expected_metrics = ["CRPS", "MIS", "Ignorance", "Brier", "Jeffreys", "Coverage"]
+    
+    for metric in expected_metrics:
+        assert metric in UNCERTAINTY_METRIC_FUNCTIONS
+        assert callable(UNCERTAINTY_METRIC_FUNCTIONS[metric])
+
+
+def test_not_implemented_metrics():
+    """Test that unimplemented metrics raise NotImplementedError."""
+    actual = pd.DataFrame({'target': [[1.0]]})
+    pred = pd.DataFrame({'pred_target': [[1.0]]})
+    
+    from views_evaluation.evaluation.metric_calculators import (
+        calculate_brier,
+        calculate_jeffreys,
+        calculate_sd,
+        calculate_pEMDiv,
+        calculate_variogram,
+    )
+    
+    unimplemented_functions = [
+        calculate_brier,
+        calculate_jeffreys,
+        calculate_sd,
+        calculate_pEMDiv,
+        calculate_variogram,
+    ]
+    
+    for func in unimplemented_functions:
+        with pytest.raises(NotImplementedError):
+            func(actual, pred, 'target') 
\ No newline at end of file
diff --git a/views_evaluation/evaluation/evaluation_manager.py b/views_evaluation/evaluation/evaluation_manager.py
index 0f5074a..9b0f859 100644
--- a/views_evaluation/evaluation/evaluation_manager.py
+++ b/views_evaluation/evaluation/evaluation_manager.py
@@ -1,17 +1,15 @@
 from typing import List, Dict, Tuple, Optional
+import logging
 import pandas as pd
 import numpy as np
-import properscoring as ps
-from sklearn.metrics import (
-    root_mean_squared_error,
-    root_mean_squared_log_error,
-    average_precision_score,
-)
 from views_evaluation.evaluation.metrics import (
     PointEvaluationMetrics,
     UncertaintyEvaluationMetrics,
 )
-import logging
+from views_evaluation.evaluation.metric_calculators import (
+    POINT_METRIC_FUNCTIONS,
+    UNCERTAINTY_METRIC_FUNCTIONS,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -31,107 +29,55 @@ def __init__(self, metrics_list: list):
         """
 
         self.metrics_list = metrics_list
-        self.point_metric_functions = {
-            "RMSLE": self._calculate_rmsle,
-            "CRPS": self._calculate_crps,
-            "AP": self._calculate_ap,
-            "Brier": self._calculate_brier,
-            "Jeffreys": self._calculate_jeffreys,
-            "Coverage": self._calculate_coverage,
-            "EMD": self._calculate_emd,
-            "SD": self._calculate_sd,
-            "pEMDiv": self._calculate_pEMDiv,
-            "Pearson": self._calculate_pearson,
-            "Variogram": self._calculate_variogram,
-        }
-        self.uncertainty_metric_functions = {
-            "CRPS": self._calculate_crps,
-        }
-
-    @staticmethod
-    def _calculate_rmsle(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        return (
-            root_mean_squared_error(matched_actual, matched_pred)
-            if target.startswith("ln")
-            else root_mean_squared_log_error(matched_actual, matched_pred)
-        )
-
-    @staticmethod
-    def _calculate_crps(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        return np.mean(
-            [
-                ps.crps_ensemble(actual, np.array(pred))
-                for actual, pred in zip(
-                    matched_actual[target], matched_pred[f"pred_{target}"]
-                )
-            ]
-        )
+        self.point_metric_functions = POINT_METRIC_FUNCTIONS
+        self.uncertainty_metric_functions = UNCERTAINTY_METRIC_FUNCTIONS
 
     @staticmethod
-    def _calculate_ap(
-        matched_actual: pd.DataFrame,
-        matched_pred: pd.DataFrame,
-        target: str,
-        threshold=0.01,
-    ) -> float:
+    def transform_data(df: pd.DataFrame, target: str) -> pd.DataFrame:
         """
-        Calculate Average Precision (AP) for binary predictions with a threshold.
+        Transform the data to normal distribution.
         """
-        matched_pred_binary = (matched_pred >= threshold).astype(int)
-        matched_actual_binary = (matched_actual > 0).astype(int)
-        return average_precision_score(matched_actual_binary, matched_pred_binary)
-
-    @staticmethod
-    def _calculate_brier(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
-
-    @staticmethod
-    def _calculate_jeffreys(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
-
-    @staticmethod
-    def _calculate_coverage(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
-
-    @staticmethod
-    def _calculate_emd(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
-
-    @staticmethod
-    def _calculate_sd(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
+        if target.startswith("ln") or target.startswith("pred_ln"):
+            df[[target]] = df[[target]].applymap(
+                lambda x: (
+                    np.exp(x) - 1
+                    if isinstance(x, (list, np.ndarray))
+                    else np.exp(x) - 1
+                )
+            )
+        elif target.startswith("lx") or target.startswith("pred_lx"):
+            df[[target]] = df[[target]].applymap(
+                lambda x: (
+                    np.exp(x) - np.exp(100)
+                    if isinstance(x, (list, np.ndarray))
+                    else np.exp(x) - np.exp(100)
+                )
+            )
+        elif target.startswith("lr") or target.startswith("pred_lr"):
+            df[[target]] = df[[target]].applymap(
+                lambda x: x if isinstance(x, (list, np.ndarray)) else x
+            )
+        else:
+            raise ValueError(f"Target {target} is not a valid target")
+        return df
 
     @staticmethod
-    def _calculate_pEMDiv(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
+    def convert_to_arrays(df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Convert columns in a DataFrame to numpy arrays.
 
-    @staticmethod
-    def _calculate_pearson(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
+        Args:
+            df (pd.DataFrame): The input DataFrame with columns that may contain lists.
 
-    @staticmethod
-    def _calculate_variogram(
-        matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
-    ) -> float:
-        pass
+        Returns:
+            pd.DataFrame: A new DataFrame with columns converted to numpy arrays.
+        """
+        converted = df.copy()
+        for col in converted.columns:
+            converted[col] = converted[col].apply(
+                lambda x: np.array(x) if isinstance(x, list) else np.array([x])
+            )
+        return converted
 
     @staticmethod
     def get_evaluation_type(predictions: List[pd.DataFrame]) -> bool:
@@ -144,47 +90,55 @@ def get_evaluation_type(predictions: List[pd.DataFrame]) -> bool:
 
         Returns:
             bool: True if all DataFrames are for uncertainty evaluation,
-                  False if any DataFrame is suitable for point evaluation.
+                  False if all DataFrame are for point evaluation.
 
         Raises:
-            ValueError: If there is a mix of results (some DataFrames for uncertainty and others for point evaluation).
+            ValueError: If there is a mix of single and multiple values in the lists,
+                      or if uncertainty lists have different lengths.
         """
-        all_uncertainty = True
-        all_point = True
+        is_uncertainty = False
+        is_point = False
+        uncertainty_length = None
 
         for df in predictions:
-            if all(
-                isinstance(value, list) and len(value) >= 2
-                for value in df.values.flatten()
-            ):
-                all_point = False
-            else:
-                all_uncertainty = False
-
-        if all_uncertainty and not all_point:
-            return True
-        elif all_point and not all_uncertainty:
-            return False
-        else:
+            for value in df.values.flatten():
+                if not (isinstance(value, np.ndarray) or isinstance(value, list)):
+                    raise ValueError(
+                        "All values must be lists or numpy arrays. Convert the data."
+                    )
+                
+                if len(value) > 1:
+                    is_uncertainty = True
+                    # For uncertainty evaluation, check that all lists have the same length
+                    if uncertainty_length is None:
+                        uncertainty_length = len(value)
+                    elif len(value) != uncertainty_length:
+                        raise ValueError(
+                            f"Inconsistent list lengths in uncertainty evaluation. "
+                            f"Found lengths {uncertainty_length} and {len(value)}"
+                        )
+                elif len(value) == 1:
+                    is_point = True
+                else:
+                    raise ValueError("Empty lists are not allowed")
+
+        if is_uncertainty and is_point:
             raise ValueError(
-                "Mix of evaluation types detected: some DataFrames are for uncertainty, others for point evaluation."
-                "Please ensure all DataFrames are consistent in their evaluation type"
+                "Mix of evaluation types detected: some rows contain single values, others contain multiple values. "
+                "Please ensure all rows are consistent in their evaluation type"
             )
 
+        return is_uncertainty
+
     @staticmethod
-    def validate_predictions(
-        predictions: List[pd.DataFrame], target: str, is_uncertainty: bool
-    ):
+    def validate_predictions(predictions: List[pd.DataFrame], target: str):
         """
         Checks if the predictions are valid DataFrames.
         - Each DataFrame must have exactly one column named `pred_column_name`.
-        - If is_uncertainty is True, all elements in the column must be lists.
-        - If is_uncertainty is False, all elements in the column must be floats.
 
         Args:
             predictions (List[pd.DataFrame]): A list of DataFrames containing the predictions.
             target (str): The target column in the actual DataFrame.
-            is_uncertainty (bool): Flag to indicate if the evaluation is for uncertainty.
         """
         pred_column_name = f"pred_{target}"
         if not isinstance(predictions, list):
@@ -195,20 +149,10 @@ def validate_predictions(
                 raise TypeError(f"Predictions[{i}] must be a DataFrame.")
             if df.empty:
                 raise ValueError(f"Predictions[{i}] must not be empty.")
-            if df.columns.tolist() != [pred_column_name]:
+            if pred_column_name not in df.columns:
                 raise ValueError(
-                    f"Predictions[{i}] must contain only one column named '{pred_column_name}'."
+                    f"Predictions[{i}] must contain the column named '{pred_column_name}'."
                 )
-            if (
-                is_uncertainty
-                and not df.applymap(lambda x: isinstance(x, list)).all().all()
-            ):
-                raise ValueError("Each row in the predictions must be a list.")
-            if (
-                not is_uncertainty
-                and not df.applymap(lambda x: isinstance(x, (int, float))).all().all()
-            ):
-                raise ValueError("Each row in the predictions must be a float.")
 
     @staticmethod
     def _match_actual_pred(
@@ -271,6 +215,7 @@ def step_wise_evaluation(
         target: str,
         steps: List[int],
         is_uncertainty: bool,
+        **kwargs,
     ):
         """
         Evaluates the predictions step-wise and calculates the specified metrics.
@@ -298,7 +243,6 @@ def step_wise_evaluation(
             )
             metric_functions = self.point_metric_functions
 
-        step_metrics = {}
         result_dfs = EvaluationManager._split_dfs_by_step(predictions)
 
         for metric in self.metrics_list:
@@ -310,7 +254,7 @@ def step_wise_evaluation(
                     )
                     evaluation_dict[f"step{str(step).zfill(2)}"].__setattr__(
                         metric,
-                        metric_functions[metric](matched_actual, matched_pred, target),
+                        metric_functions[metric](matched_actual, matched_pred, target, **kwargs),
                     )
             else:
                 logger.warning(f"Metric {metric} is not a default metric, skipping...")
@@ -326,6 +270,7 @@ def time_series_wise_evaluation(
         predictions: List[pd.DataFrame],
         target: str,
         is_uncertainty: bool,
+        **kwargs,
     ):
         """
         Evaluates the predictions time series-wise and calculates the specified metrics.
@@ -362,7 +307,7 @@ def time_series_wise_evaluation(
                     )
                     evaluation_dict[f"ts{str(i).zfill(2)}"].__setattr__(
                         metric,
-                        metric_functions[metric](matched_actual, matched_pred, target),
+                        metric_functions[metric](matched_actual, matched_pred, target, **kwargs),
                     )
             else:
                 logger.warning(f"Metric {metric} is not a default metric, skipping...")
@@ -378,6 +323,7 @@ def month_wise_evaluation(
         predictions: List[pd.DataFrame],
         target: str,
         is_uncertainty: bool,
+        **kwargs,
     ):
         """
         Evaluates the predictions month-wise and calculates the specified metrics.
@@ -395,7 +341,7 @@ def month_wise_evaluation(
         month_range = pred_concat.index.get_level_values(0).unique()
         month_start = month_range.min()
         month_end = month_range.max()
-        
+
         if is_uncertainty:
             evaluation_dict = (
                 UncertaintyEvaluationMetrics.make_month_wise_evaluation_dict(
@@ -423,6 +369,7 @@ def month_wise_evaluation(
                         matched_actual.loc[df.index, [target]],
                         matched_pred.loc[df.index, [f"pred_{target}"]],
                         target,
+                        **kwargs,
                     )
                 )
 
@@ -444,6 +391,7 @@ def evaluate(
         predictions: List[pd.DataFrame],
         target: str,
         steps: List[int],
+        **kwargs,
     ):
         """
         Evaluates the predictions and calculates the specified point metrics.
@@ -455,18 +403,33 @@ def evaluate(
             steps (List[int]): The steps to evaluate.
 
         """
+
+        EvaluationManager.validate_predictions(predictions, target)
+        actual = EvaluationManager.transform_data(
+            EvaluationManager.convert_to_arrays(actual), target
+        )
+        predictions = [
+            EvaluationManager.transform_data(
+                EvaluationManager.convert_to_arrays(pred), f"pred_{target}"
+            )
+            for pred in predictions
+        ]
         is_uncertainty = EvaluationManager.get_evaluation_type(predictions)
-        EvaluationManager.validate_predictions(predictions, target, is_uncertainty)
 
         evaluation_results = {}
         evaluation_results["month"] = self.month_wise_evaluation(
-            actual, predictions, target, is_uncertainty
+            actual, predictions, target, is_uncertainty, **kwargs
         )
         evaluation_results["time_series"] = self.time_series_wise_evaluation(
-            actual, predictions, target, is_uncertainty
+            actual, predictions, target, is_uncertainty, **kwargs
         )
         evaluation_results["step"] = self.step_wise_evaluation(
-            actual, predictions, target, steps, is_uncertainty,
+            actual,
+            predictions,
+            target,
+            steps,
+            is_uncertainty,
+            **kwargs,
         )
 
         return evaluation_results
diff --git a/views_evaluation/evaluation/metric_calculators.py b/views_evaluation/evaluation/metric_calculators.py
new file mode 100644
index 0000000..02d775f
--- /dev/null
+++ b/views_evaluation/evaluation/metric_calculators.py
@@ -0,0 +1,380 @@
+from typing import List, Dict, Tuple, Optional
+from collections import Counter
+import pandas as pd
+import numpy as np
+import properscoring as ps
+from sklearn.metrics import (
+    root_mean_squared_log_error,
+    average_precision_score,
+)
+from scipy.stats import wasserstein_distance, pearsonr
+
+
+def calculate_rmsle(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Root Mean Squared Logarithmic Error (RMSLE) for each prediction.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Average RMSLE score
+    """
+    actual_values = np.concatenate(matched_actual[target].values)
+    pred_values = np.concatenate(matched_pred[f"pred_{target}"].values)
+
+    actual_expanded = np.repeat(
+        actual_values, [len(x) for x in matched_pred[f"pred_{target}"]]
+    )
+
+    return root_mean_squared_log_error(actual_expanded, pred_values)
+
+
+def calculate_crps(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Continuous Ranked Probability Score (CRPS) for each prediction.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Average CRPS score
+    """
+    return np.mean(
+        [
+            ps.crps_ensemble(actual[0], np.array(pred))
+            for actual, pred in zip(
+                matched_actual[target], matched_pred[f"pred_{target}"]
+            )
+        ]
+    )
+
+
+def calculate_ap(
+    matched_actual: pd.DataFrame,
+    matched_pred: pd.DataFrame,
+    target: str,
+    threshold=25,
+) -> float:
+    """
+    Calculate Average Precision (AP) for binary predictions with a threshold.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+        threshold (float): Threshold to convert predictions to binary values
+
+    Returns:
+        float: Average Precision score
+    """
+    actual_values = np.concatenate(matched_actual[target].values)
+    pred_values = np.concatenate(matched_pred[f"pred_{target}"].values)
+
+    actual_expanded = np.repeat(
+        actual_values, [len(x) for x in matched_pred[f"pred_{target}"]]
+    )
+
+    actual_binary = (actual_expanded > threshold).astype(int)
+    pred_binary = (pred_values >= threshold).astype(int)
+
+    return average_precision_score(actual_binary, pred_binary)
+
+
+def calculate_emd(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Earth Mover's Distance (EMD) between predicted and actual distributions.
+    EMD measures the minimum amount of work needed to transform one distribution into another.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Average EMD score
+    """
+    emd_list = []
+    for actual, preds in zip(matched_actual[target], matched_pred[f"pred_{target}"]):
+        actual_val = np.asarray(actual)
+        preds_arr = np.asarray(preds)
+        emd_list.append(wasserstein_distance(preds_arr, actual_val))
+    return np.mean(emd_list)
+
+
+def calculate_sd(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Sinkhorn Distance between predicted and actual distributions.
+
+    Sinkhorn Distance is a regularized version of the Earth Mover's Distance
+    that is computationally more efficient.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Sinkhorn Distance score
+    """
+    raise NotImplementedError("Sinkhorn Distance calculation not yet implemented")
+
+
+def calculate_pEMDiv(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate pseudo-Earth Mover Divergence between predicted and actual distributions.
+
+    pEMDiv is a computationally efficient approximation of the Earth Mover's Distance.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: pEMDiv score
+    """
+    raise NotImplementedError("pEMDiv calculation not yet implemented")
+
+
+def calculate_pearson(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Pearson correlation coefficient between actual and predicted values.
+    This measures the linear correlation between predictions and actual values.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Pearson correlation coefficient
+    """
+    actual_values = np.concatenate(matched_actual[target].values)
+    pred_values = np.concatenate(matched_pred[f"pred_{target}"].values)
+
+    actual_expanded = np.repeat(
+        actual_values, [len(x) for x in matched_pred[f"pred_{target}"]]
+    )
+
+    correlation, _ = pearsonr(actual_expanded, pred_values)
+    return correlation
+
+
+def calculate_variogram(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    !! How to accountr for time and location?
+    Calculate the variogram score between actual and predicted values.
+    This measures the spatial/temporal correlation structure.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Variogram score
+    """
+    raise NotImplementedError("Variogram calculation not yet implemented")
+
+
+def calculate_brier(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Brier Score for probabilistic predictions.
+
+    The Brier Score measures the accuracy of probabilistic predictions.
+    Lower values indicate better predictions.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Brier Score
+    """
+    raise NotImplementedError("Brier Score calculation not yet implemented")
+
+
+def calculate_jeffreys(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
+) -> float:
+    """
+    Calculate Jeffreys Divergence between predicted and actual distributions.
+
+    Jeffreys Divergence is a symmetric measure of the difference between
+    two probability distributions.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+
+    Returns:
+        float: Jeffreys Divergence score
+    """
+    raise NotImplementedError("Jeffreys Divergence calculation not yet implemented")
+
+
+def calculate_coverage(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str, alpha=0.1
+) -> float:
+    """
+    Calculate Coverage (Histograms) for probabilistic predictions.
+
+    Coverage measures how well the predicted distribution covers the actual values.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+        alpha (float): Significance level for the interval (default: 0.1)
+    Returns:
+        float: Coverage score
+    """
+    y_true = matched_actual[target].values
+    y_pred_samples = matched_pred[f"pred_{target}"].values
+    
+    lower_q = alpha / 2
+    upper_q = 1 - alpha / 2
+    
+    covered = []
+    for yt, pred_list in zip(y_true, y_pred_samples):
+        lower = np.quantile(pred_list, lower_q)
+        upper = np.quantile(pred_list, upper_q)
+        covered.append(lower <= yt <= upper)
+        
+    return np.mean(covered)
+
+
+def calculate_mean_interval_score(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str, alpha=0.05
+):
+    """
+    Calculate the Mean Interval Score (MIS) for probabilistic predictions.
+
+    The Mean Interval Score measures the average width of prediction intervals
+    and the coverage of the actual values.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+        alpha (float): Significance level for the interval (default: 0.05)
+
+    Returns:
+        float: Mean Interval Score
+    """
+    lower = np.array(
+        [np.quantile(row, q=alpha / 2) for row in matched_pred[f"pred_{target}"]]
+    )
+    upper = np.array(
+        [np.quantile(row, q=1 - (alpha / 2)) for row in matched_pred[f"pred_{target}"]]
+    )
+    actuals = np. array(
+        [
+            row[0] if isinstance(row, (np.ndarray, list)) else row
+            for row in matched_actual[target]
+        ]
+    )
+
+    interval_width = upper - lower
+    lower_coverage = (2 / alpha) * (lower - actuals) * (actuals < lower)
+    upper_coverage = (2 / alpha) * (actuals - upper) * (actuals > upper)
+    interval_score = interval_width + lower_coverage + upper_coverage
+
+    return np.mean(interval_score)
+
+
+def calculate_ignorance_score(
+    matched_actual: pd.DataFrame,
+    matched_pred: pd.DataFrame,
+    target: str,
+    bins=[0, 0.5, 2.5, 5.5, 10.5, 25.5, 50.5, 100.5, 250.5, 500.5, 1000.5],
+    low_bin=0,
+    high_bin=10000,
+):
+    """
+    !!Note unfinished. Bins need to be fixed bacause in competition we evaluate over log values but not here.
+    This is an adapted version from https://github.com/prio-data/prediction_competition_2023/tree/main
+    Compute Binned Ignorance Score for predictions and observations.
+
+    Parameters:
+        matched_actual (pd.DataFrame): DataFrame containing actual values
+        matched_pred (pd.DataFrame): DataFrame containing predictions
+        target (str): The target column name
+        bins (list): List of bins for the histogram
+        low_bin (float): The lower bound of the bins
+        high_bin (float): The upper bound of the bins
+
+    Returns:
+        float: Mean ignorance score.
+    """
+
+    def digitize_minus_one(x, edges):
+        return np.digitize(x, edges, right=False) - 1
+
+    def _calculate_ignorance_score(predictions, observed, n):
+        c = Counter(predictions)
+        prob = c[observed] / n
+        return -np.log2(prob)
+
+    scores = []
+    for row_p, row_o in zip(matched_pred[f"pred_{target}"], matched_actual[target]):
+        preds = np.asarray(row_p)
+        truth = float(np.asarray(row_o).squeeze())
+
+        edges = np.histogram_bin_edges(preds, bins=bins, range=(low_bin, high_bin))
+
+        binned_preds = digitize_minus_one(preds, edges)
+        binned_obs = digitize_minus_one([truth], edges)[0]
+
+        synthetic = np.arange(len(edges) - 1)
+        binned_preds = np.concatenate([binned_preds, synthetic])
+
+        n = len(binned_preds)
+        score = _calculate_ignorance_score(binned_preds, binned_obs, n)
+        scores.append(score)
+
+    return np.mean(scores)
+
+
+POINT_METRIC_FUNCTIONS = {
+    "RMSLE": calculate_rmsle,
+    "CRPS": calculate_crps,
+    "AP": calculate_ap,
+    "EMD": calculate_emd,
+    "SD": calculate_sd,
+    "pEMDiv": calculate_pEMDiv,
+    "Pearson": calculate_pearson,
+    "Variogram": calculate_variogram,
+}
+
+UNCERTAINTY_METRIC_FUNCTIONS = {
+    "CRPS": calculate_crps,
+    "MIS": calculate_mean_interval_score,
+    "Ignorance": calculate_ignorance_score,
+    "Brier": calculate_brier,
+    "Jeffreys": calculate_jeffreys,
+    "Coverage": calculate_coverage,
+}
diff --git a/views_evaluation/evaluation/metrics.py b/views_evaluation/evaluation/metrics.py
index 70f158e..36b2cb5 100644
--- a/views_evaluation/evaluation/metrics.py
+++ b/views_evaluation/evaluation/metrics.py
@@ -121,9 +121,6 @@ class PointEvaluationMetrics(BaseEvaluationMetrics):
     RMSLE: Optional[float] = None
     CRPS: Optional[float] = None
     AP: Optional[float] = None
-    Brier: Optional[float] = None
-    Jeffreys: Optional[float] = None
-    Coverage: Optional[float] = None
     EMD: Optional[float] = None
     SD: Optional[float] = None
     pEMDiv: Optional[float] = None
@@ -140,4 +137,10 @@ class UncertaintyEvaluationMetrics(BaseEvaluationMetrics):
         CRPS (Optional[float]): Continuous Ranked Probability Score.
     """
 
-    CRPS: Optional[float] = None
\ No newline at end of file
+    CRPS: Optional[float] = None
+    MIS: Optional[float] = None
+    Ignorance: Optional[float] = None
+    Brier: Optional[float] = None
+    Jeffreys: Optional[float] = None
+    Coverage: Optional[float] = None
+    
\ No newline at end of file