From 4271c2d4be0e392bf99b340b5bcf82b6c2f78006 Mon Sep 17 00:00:00 2001 From: Yuji Ota Date: Mon, 26 Aug 2024 12:14:23 +0000 Subject: [PATCH] Add Prior knowledge for LiM #145 Add prior knowledge option for LiM class. Each element of the prior knowledge matrix should be 0 (unconnected) or -1 (unknown). Modifications to the algorithm are: Set bounds of variables of unconnected path as 0 for L-BFGS on global search. Omitting paths of those to be unconnected for local search. --- examples/Use_prior_knowledge_in_LiM.ipynb | 917 ++++++++++++++++++++++ lingam/lim.py | 24 +- tests/test_lim.py | 29 + 3 files changed, 969 insertions(+), 1 deletion(-) create mode 100644 examples/Use_prior_knowledge_in_LiM.ipynb diff --git a/examples/Use_prior_knowledge_in_LiM.ipynb b/examples/Use_prior_knowledge_in_LiM.ipynb new file mode 100644 index 0000000..f78d183 --- /dev/null +++ b/examples/Use_prior_knowledge_in_LiM.ipynb @@ -0,0 +1,917 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use prior knowledge in LiM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import and settings\n", + "In this example, we need to import `numpy`, `pandas`, and `graphviz` in addition to `lingam`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:49.386559Z", + "start_time": "2019-12-12T06:27:47.059613Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['1.26.4', '2.2.2', '0.20.3', '1.9.0']\n" + ] + } + ], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import graphviz\n", + "import lingam\n", + "from lingam.utils import make_prior_knowledge, make_dot\n", + "\n", + "print([np.__version__, pd.__version__, graphviz.__version__, lingam.__version__])\n", + "\n", + "np.set_printoptions(precision=3, suppress=True)\n", + "np.random.seed(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Utility function\n", + "We define a utility function to draw the directed acyclic graph." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:49.416479Z", + "start_time": "2019-12-12T06:27:49.404510Z" + } + }, + "outputs": [], + "source": [ + "def make_prior_knowledge_graph(prior_knowledge_matrix):\n", + " d = graphviz.Digraph(engine='dot')\n", + " \n", + " labels = [f'x{i}' for i in range(prior_knowledge_matrix.shape[0])]\n", + " for label in labels:\n", + " d.node(label, label)\n", + "\n", + " dirs = np.where(prior_knowledge_matrix > 0)\n", + " for to, from_ in zip(dirs[0], dirs[1]):\n", + " d.edge(labels[from_], labels[to])\n", + "\n", + " dirs = np.where(prior_knowledge_matrix < 0)\n", + " for to, from_ in zip(dirs[0], dirs[1]):\n", + " if to != from_:\n", + " d.edge(labels[from_], labels[to], style='dashed')\n", + " return d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test data\n", + "We create test data consisting of 6 variables." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:49.484297Z", + "start_time": "2019-12-12T06:27:49.420469Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1. , 1. , 1. , 0.194, 0.388],\n", + " [ 1. , 1. , 0. , 0.142, -1.475],\n", + " [ 1. , 1. , 1. , -0.061, -1.92 ],\n", + " [ 1. , 1. , 1. , 0.612, -0.297],\n", + " [ 1. , 1. , 1. , 0.331, -2.172]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cwd = os.getcwd()\n", + "X = np.loadtxt(f\"{cwd}/../tests/test_lim_data.csv\", delimiter=\",\")\n", + "dis_con = np.array([[0.0, 0.0, 0.0, 1.0, 1.0]])\n", + "\n", + "X[:5,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:49.947772Z", + "start_time": "2019-12-12T06:27:49.486292Z" + } + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "x0\n", + "\n", + "x0\n", + "\n", + "\n", + "\n", + "x3\n", + "\n", + "x3\n", + "\n", + "\n", + "\n", + "x0->x3\n", + "\n", + "\n", + "0.70\n", + "\n", + "\n", + "\n", + "x1\n", + "\n", + "x1\n", + "\n", + "\n", + "\n", + "x1->x0\n", + "\n", + "\n", + "1.09\n", + "\n", + "\n", + "\n", + "x2\n", + "\n", + "x2\n", + "\n", + "\n", + "\n", + "x2->x0\n", + "\n", + "\n", + "-1.29\n", + "\n", + "\n", + "\n", + "x2->x1\n", + "\n", + "\n", + "0.80\n", + "\n", + "\n", + "\n", + "x2->x3\n", + "\n", + "\n", + "1.91\n", + "\n", + "\n", + "\n", + "x4\n", + "\n", + "x4\n", + "\n", + "\n", + "\n", + "x2->x4\n", + "\n", + "\n", + "-0.63\n", + "\n", + "\n", + "\n", + "x4->x0\n", + "\n", + "\n", + "-0.84\n", + "\n", + "\n", + "\n", + "x4->x3\n", + "\n", + "\n", + "1.94\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "W_true = np.array(\n", + " [ \n", + " [0.0, 1.09482609, -1.29270764, 0.0, -0.84424137],\n", + " [0.0, 0.0, 0.80393307, 0.0, 0.0],\n", + " [0.0, 0.0, 0.0, 0.0, 0.0],\n", + " [0.70346053, 0.0, 1.90912441, 0.0, 1.94441713],\n", + " [0.0, 0.0, -0.63152585, 0.0, 0.0],\n", + " ]\n", + ")\n", + "make_dot(W_true)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make Prior Knowledge Matrix\n", + "We create prior knowledge so that x0, x1 and x4 are sink variables.\n", + "\n", + "The elements of prior knowledge matrix are defined as follows:\n", + "* ``0`` : :math:`x_i` does not have a directed path to :math:`x_j`\n", + "* ``1`` : :math:`x_i` has a directed path to :math:`x_j`\n", + "* ``-1`` : No prior knowledge is available to know if either of the two cases above (0 or 1) is true." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:50.380209Z", + "start_time": "2019-12-12T06:27:49.950763Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-1 -1 -1 0 -1]\n", + " [ 0 -1 -1 0 -1]\n", + " [ 0 -1 -1 0 -1]\n", + " [ 0 -1 -1 -1 -1]\n", + " [ 0 -1 -1 0 -1]]\n" + ] + } + ], + "source": [ + "prior_knowledge = make_prior_knowledge(\n", + " n_variables=5,\n", + " sink_variables=[0, 3],\n", + ")\n", + "print(prior_knowledge)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "x0\n", + "\n", + "x0\n", + "\n", + "\n", + "\n", + "x1\n", + "\n", + "x1\n", + "\n", + "\n", + "\n", + "x1->x0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x2\n", + "\n", + "x2\n", + "\n", + "\n", + "\n", + "x1->x2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x3\n", + "\n", + "x3\n", + "\n", + "\n", + "\n", + "x1->x3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x4\n", + "\n", + "x4\n", + "\n", + "\n", + "\n", + "x1->x4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x2->x0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x2->x1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x2->x3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x2->x4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x4->x0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x4->x1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x4->x2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x4->x3\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Draw a graph of prior knowledge\n", + "make_prior_knowledge_graph(prior_knowledge)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Causal Discovery\n", + "First, we create a `LiM` object without the prior knowledge matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0. 0.495 -0.211 0. 0. ]\n", + " [ 0. 0. 0.388 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. ]\n", + " [-0.119 -0.408 1. 0. 0.995]\n", + " [-0.365 -0.422 -0.254 0. 0. ]]\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "x0\n", + "\n", + "x0\n", + "\n", + "\n", + "\n", + "x3\n", + "\n", + "x3\n", + "\n", + "\n", + "\n", + "x0->x3\n", + "\n", + "\n", + "-0.12\n", + "\n", + "\n", + "\n", + "x4\n", + "\n", + "x4\n", + "\n", + "\n", + "\n", + "x0->x4\n", + "\n", + "\n", + "-0.36\n", + "\n", + "\n", + "\n", + "x1\n", + "\n", + "x1\n", + "\n", + "\n", + "\n", + "x1->x0\n", + "\n", + "\n", + "0.49\n", + "\n", + "\n", + "\n", + "x1->x3\n", + "\n", + "\n", + "-0.41\n", + "\n", + "\n", + "\n", + "x1->x4\n", + "\n", + "\n", + "-0.42\n", + "\n", + "\n", + "\n", + "x2\n", + "\n", + "x2\n", + "\n", + "\n", + "\n", + "x2->x0\n", + "\n", + "\n", + "-0.21\n", + "\n", + "\n", + "\n", + "x2->x1\n", + "\n", + "\n", + "0.39\n", + "\n", + "\n", + "\n", + "x2->x3\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "x2->x4\n", + "\n", + "\n", + "-0.25\n", + "\n", + "\n", + "\n", + "x4->x3\n", + "\n", + "\n", + "0.99\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = lingam.LiM()\n", + "model.fit(X, dis_con)\n", + "print(model.adjacency_matrix_)\n", + "make_dot(model.adjacency_matrix_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### x0, x3 as sink variables" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-12T06:27:50.531152Z", + "start_time": "2019-12-12T06:27:50.383201Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-1 -1 -1 0 -1]\n", + " [ 0 -1 -1 0 -1]\n", + " [ 0 -1 -1 0 -1]\n", + " [ 0 -1 -1 -1 -1]\n", + " [ 0 -1 -1 0 -1]]\n", + "[[ 0. 0.41 0.743 0. 1. ]\n", + " [ 0. 0. 0. 0. 1. ]\n", + " [ 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. -0.361 0. 0.951]\n", + " [ 0. 0. -1.066 0. 0. ]]\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "x0\n", + "\n", + "x0\n", + "\n", + "\n", + "\n", + "x1\n", + "\n", + "x1\n", + "\n", + "\n", + "\n", + "x1->x0\n", + "\n", + "\n", + "0.41\n", + "\n", + "\n", + "\n", + "x2\n", + "\n", + "x2\n", + "\n", + "\n", + "\n", + "x2->x0\n", + "\n", + "\n", + "0.74\n", + "\n", + "\n", + "\n", + "x3\n", + "\n", + "x3\n", + "\n", + "\n", + "\n", + "x2->x3\n", + "\n", + "\n", + "-0.36\n", + "\n", + "\n", + "\n", + "x4\n", + "\n", + "x4\n", + "\n", + "\n", + "\n", + "x2->x4\n", + "\n", + "\n", + "-1.07\n", + "\n", + "\n", + "\n", + "x4->x0\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "x4->x1\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "x4->x3\n", + "\n", + "\n", + "0.95\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prior_knowledge = make_prior_knowledge(\n", + " n_variables=5,\n", + " sink_variables=[0, 3],\n", + ")\n", + "print(prior_knowledge)\n", + "\n", + "model = lingam.LiM(prior_knowledge=prior_knowledge, loss_type=\"mixed\")\n", + "model.fit(X, dis_con, only_global=False)\n", + "print(model.adjacency_matrix_)\n", + "make_dot(model.adjacency_matrix_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that x0, and x3 are output as sink variables, as specified in the prior knowledge." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### x2 as exogenous variables\n", + "Next, let's specify the prior knowledge so that x0 is an exogenous variable." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "x0\n", + "\n", + "x0\n", + "\n", + "\n", + "\n", + "x1\n", + "\n", + "x1\n", + "\n", + "\n", + "\n", + "x1->x0\n", + "\n", + "\n", + "0.15\n", + "\n", + "\n", + "\n", + "x2\n", + "\n", + "x2\n", + "\n", + "\n", + "\n", + "x2->x1\n", + "\n", + "\n", + "0.38\n", + "\n", + "\n", + "\n", + "x3\n", + "\n", + "x3\n", + "\n", + "\n", + "\n", + "x2->x3\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "x4\n", + "\n", + "x4\n", + "\n", + "\n", + "\n", + "x2->x4\n", + "\n", + "\n", + "-0.33\n", + "\n", + "\n", + "\n", + "x4->x0\n", + "\n", + "\n", + "1.00\n", + "\n", + "\n", + "\n", + "x4->x3\n", + "\n", + "\n", + "0.24\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prior_knowledge = make_prior_knowledge(\n", + " n_variables=5,\n", + " exogenous_variables=[2],\n", + ")\n", + "\n", + "model = lingam.LiM(prior_knowledge=prior_knowledge)\n", + "model.fit(X, dis_con)\n", + "\n", + "make_dot(model.adjacency_matrix_)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/lingam/lim.py b/lingam/lim.py index e25e9db..f1f8d04 100644 --- a/lingam/lim.py +++ b/lingam/lim.py @@ -35,6 +35,7 @@ def __init__( h_tol=1e-8, rho_max=1e16, w_threshold=0.1, + prior_knowledge=None, ): """Construct a LiM model. @@ -52,6 +53,13 @@ def __init__( Maximum value of the regularization parameter rho. w_threshold : float (default=0.1) Drop edge if the weight btw. variables is less than w_threshold. + prior_knowledge : array-like, shape (n_features, n_features), optional (default=None) + Prior knowledge used for causal discovery, where ``n_features`` is the number of features. + + The elements of prior knowledge matrix are defined as follows [1]_: + + * ``0`` : :math:`x_i` does not have a directed path to :math:`x_j` + * ``-1`` : No prior knowledge is available to know if either of the two cases above (0 or 1) is true. """ self._lambda1 = lambda1 self._loss_type = loss_type @@ -60,6 +68,10 @@ def __init__( self._rho_max = rho_max self._w_threshold = w_threshold self._adjacency_matrix = None + self._Aknw = prior_knowledge + if self._Aknw is not None: + self._Aknw = ut.check_array(self._Aknw) + self._Aknw = np.where(self._Aknw < 0, np.nan, self._Aknw) def fit(self, X, dis_con, only_global=False, is_poisson=False): """Fit the model to X with mixed data. @@ -289,6 +301,9 @@ def _factorial(y): for i in range(d) for j in range(d) ] + if self._Aknw is not None: + pk = np.concatenate((self._Aknw.flatten(), self._Aknw.flatten())) + bnds = [b if np.isnan(p) else (0, 0) for b, p in zip(bnds, pk)] # if self._loss_type == 'l2': # X = X - np.mean(X, axis=0, keepdims=True) @@ -319,7 +334,7 @@ def _factorial(y): w_est = np.random.random(2 * d * d) W_est = _adj(w_est) W_est[np.abs(W_est) < self._w_threshold] = 0 - print("W_est (without the 2nd phase) is: \n", W_est) + # print("W_est (without the 2nd phase) is: \n", W_est) if not only_global: self._loss_type = "mixed_dag" @@ -334,6 +349,9 @@ def _factorial(y): W_tmp = np.copy(W_est) for iii in range(len(I[0])): # transform to W_tmp if candi_setting[candi_setting_i][iii] == 1: + if self._Aknw is not None: + if self._Aknw[I[1][iii], I[0][iii]] == 0: + continue W_tmp[I[0][iii], I[1][iii]] = 0 W_tmp[I[1][iii], I[0][iii]] = 1 lss, lss_G = _loss(W_tmp) @@ -368,6 +386,8 @@ def _factorial(y): if d > 2 and len(I[0]) < (d * (d - 1) / 2): W0 = np.copy(W_min_lss) W_edges = W0 + W0.T + np.eye(d) + if self._Aknw is not None: + W_edges = np.where(self._Aknw == 0, 1, W_edges) I_add = np.where(W_edges == 0) # add undirected edges' indices for add_i in range(len(I_add[0])): W_tmp = np.copy(W0) @@ -382,6 +402,8 @@ def _factorial(y): ): # if they are different W0 = np.copy(W_est) W_edges = W0 + W0.T + np.eye(d) + if self._Aknw is not None: + W_edges = np.where(self._Aknw == 0, 1, W_edges) I_add = np.where(W_edges == 0) # add undirected edges' indices for add_i in range(len(I_add[0])): W_tmp = np.copy(W0) diff --git a/tests/test_lim.py b/tests/test_lim.py index 538bc1d..89fb251 100644 --- a/tests/test_lim.py +++ b/tests/test_lim.py @@ -39,3 +39,32 @@ def test_fit_lim(): print("The estimated adjacency matrix is:\n", model.adjacency_matrix_) print("The true adjacency matrix is:\n", W_true) print("Done.") + + +def test_fit_lim_prior_knowledge(): + X = np.loadtxt(f"{DATA_DIR_PATH}/test_lim_data.csv", delimiter=",") + dis_con = np.array([[0.0, 0.0, 0.0, 1.0, 1.0]]) + W_true = np.array( + [ + [0.0, 1.09482609, -1.29270764, 0.0, -0.84424137], + [0.0, 0.0, 0.80393307, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.70346053, 0.0, 1.90912441, 0.0, 1.94441713], + [0.0, 0.0, -0.63152585, 0.0, 0.0], + ] + ) + pk = np.array( + [ + [0, -1, -1, -1, -1], + [0, 0, -1, 0, -1], + [-1, -1, 0, -1, -1], + [-1, -1, -1, 0, -1], + [-1, -1, -1, -1, -0], + ] + ) + model = lingam.LiM(prior_knowledge=pk) + model.fit(X, dis_con, only_global=False, is_poisson=False) + + print("The estimated adjacency matrix is:\n", model.adjacency_matrix_) + print("The true adjacency matrix is:\n", W_true) + print("Done.")