apache · lzjpaul · Mar 21, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/examples/healthcare/application/Kidney_Disease/README.md b/examples/healthcare/application/Kidney_Disease/README.md
@@ -19,25 +19,28 @@
 
 # Singa for Kidney Disease Prediction
 
-## Kidney Disease Prediction Task
+## Kidney disease Prediction Task
 
-Kidney disease prediction is an important tool that uses data science and machine learning techniques to predict the likelihood of a patient suffering from Kidney disease. The goal is to judge whether a patient suffers from kidney disease by analyzing multiple data such as a patient’s medical history, physiological indicators, diagnostic information, treatment options, and socioeconomic factors, so as to take appropriate interventions in advance to provide treatment.
+Kidney disease prediction is an important tool that uses data science and machine learning techniques to predict the likelihood of a patient suffering from Kidney disease. The core goal of this technology is to judge whether a patient suffers from kidney disease by analyzing multiple data such as a patient’s medical history, physiological indicators, diagnostic information, treatment options, and socioeconomic factors, so as to take appropriate interventions in advance to provide treatment.
+
+The dataset used in this task is MIMIC-III after preprocessed. The features are data containing 6 visit windows, with 2549 frequent diagnoses, procedures and drugs for each window. Each item in features are data for one patient, and these features are encoded by one-hot code. The labels are corresponding flags to mark whether the patient suffered from kidney disease, where the label equals "1" if the patient had kidn  disease, the label equals "0" if not.
 
-The dataset used in this task is MIMIC-III. The features are data containing 6 visit windows, with 2549 frequent diagnoses, procedures and drugs for each window. These features are encoded by one-hot. The labels are corresponding flags to mark whether the patient suffered from kidney disease, where the label equals "1" if the patient had kidney  disease, and the label equals "0" if not.
 
 
 ## Structure
 
-* `data` includes the load of mimic-iii data to be utilized.
+* `kidney.py` in floder `healthcare/data` includes the load of pre-processed kidney data to be utilized.
+
+* `kidney_net.py` in folder `healthcare/models` includes the construction codes of the KidneyNet model to be applied for kidney disease prediction.
+
+* `train.py` is the training script, which controls the training flow bydoing BackPropagation and SGD update.
 
-* `model` includes the MLP model construction codes by creating
-  a subclass of `Module` to wrap the neural network operations 
-  of each model.
+## Instruction
+Before starting to use this model for kidney disease prediction, download the sample dataset for kidney disease prediction: https://github.com/lzjpaul/singa-healthcare/tree/main/data/kidney
 
-* `train_kidney_mlp.py` is the training script, which controls the training flow by
-  doing BackPropagation and the SGD update.
+The provided dataset is from MIMIC-III, which has been pre-processed. And the dataset contains 100 samples for model testing.
 
-## Command
+Please download the dataset to a folder(pathToDataset), and then pass the path to run the codes using the following command:
 ```bash
-python train_kidney_mlp.py mlp kidney-disease -dir pathToDataset
-```
+python train.py kidneynet -dir pathToDataset
+```
diff --git a/examples/healthcare/application/Kidney_Disease/run.sh b/examples/healthcare/application/Kidney_Disease/run.sh
@@ -17,4 +17,4 @@
 #
 
 ### kidney disease dataset
-python train_kidney_mlp.py mlp kidney-disease -dir pathToDataset
+python train.py kidneynet -dir pathToDataset
diff --git a/...cation/Kidney_Disease/train_kidney_mlp.py → ...hcare/application/Kidney_Disease/train.py b/...cation/Kidney_Disease/train_kidney_mlp.py → ...hcare/application/Kidney_Disease/train.py
@@ -25,6 +25,10 @@
 import time
 import argparse
 from PIL import Image
+import sys
+sys.path.append("../../..")
+from healthcare.data import kidney
+from healthcare.models import kidney_net
 
 np_dtype = {"float16": np.float16, "float32": np.float32}
 
@@ -107,6 +111,7 @@ def run(global_rank,
         sgd,
         graph,
         verbosity,
+        dir_path,
         dist_option='plain',
         spars=None,
         precision='float32'):
@@ -115,9 +120,9 @@ def run(global_rank,
     dev.SetRandSeed(0)
     np.random.seed(0)
 
-    if data == 'kidney-disease':
-        from data import load_kidneydata
-        train_x, train_y, val_x, val_y = load_kidneydata.load()
+    if data == 'kidney':
+
+        train_x, train_y, val_x, val_y = kidney.load(dir_path)
     else:
     	print('Wrong Dataset!')
     	sys.exit(0)
@@ -130,14 +135,14 @@ def run(global_rank,
     print(num_channels,image_size)
 
 
-    if model == 'mlp':
+    if model == 'kidneynet':
         import os, sys, inspect
         current = os.path.dirname(
             os.path.abspath(inspect.getfile(inspect.currentframe())))
         parent = os.path.dirname(current)
         sys.path.insert(0, parent)
-        from mlp import model
-        model = model.create_model(data_size=data_size,
+
+        model = kidney_net.create_model(data_size=data_size,
                                     num_classes=num_classes)
     else:
     	print('Wrong model!')
@@ -256,11 +261,11 @@ def run(global_rank,
         description='Training using the autograd and graph.')
     parser.add_argument(
         'model',
-        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'alexnet'],
-        default='cnn')
-    parser.add_argument('data',
-                        choices=['mnist', 'cifar10', 'cifar100','mimic-iii','kidney-disease'],
-                        default='kidney-disease')
+        choices=[ 'cardionet', 'diabeticnet',  'drnet', 'hematologicnet', 'kidneynet', 'malarianet', 'tedctnet'],
+        default='kidneynet')
+    parser.add_argument('-data',
+                        choices=['mnist', 'cifar10', 'cifar100','kidney'],
+                        default='kidney')
     parser.add_argument('-p',
                         choices=['float32', 'float16'],
                         default='float32',
@@ -302,7 +307,12 @@ def run(global_rank,
                         type=int,
                         help='logging verbosity',
                         dest='verbosity')
-
+    parser.add_argument('-dir',
+                        '--dir-path',
+                        default="/tmp/kidney",
+                        type=str,
+                        help='the directory to store the kidney dataset',
+                        dest='dir_path')
     args = parser.parse_args()
 
     sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
@@ -316,4 +326,5 @@ def run(global_rank,
         sgd,
         args.graph,
         args.verbosity,
+        args.dir_path,
         precision=args.precision)
diff --git a/examples/healthcare/data/kidney.py b/examples/healthcare/data/kidney.py
@@ -0,0 +1,41 @@
+import numpy  as np
+
+import pickle
+import sys
+import os
+
+def load_dataset(dir_path="/tmp/kidney"):
+    dir_path = check_dataset_exist(dir_path=dir_path)
+    feature_path = os.path.join(dir_path, "kidney_features.pkl")
+    label_path = os.path.join(dir_path, "kidney_labels.pkl")
+    with open(feature_path,'rb') as f: 
+        features = pickle.load(f)
+    with open(label_path,'rb') as f:  
+        labels = pickle.load(f)
+
+
+    split_train_point = int(len(features) * 8/ 10)
+    train_x, train_y = features[:split_train_point], labels[:split_train_point]
+    val_x, val_y = features[split_train_point:], labels[split_train_point:]
+
+    return train_x,train_y,val_x,val_y
+
+def check_dataset_exist(dir_path):
+    if not os.path.exists(dir_path):
+        print(
+            'Please download the kidney dataset first'
+        )
+        sys.exit(0)
+    return dir_path
+
+
+def load(dir_path):
+    train_x,train_y,val_x,val_y = load_dataset(dir_path)
+
+    train_x = train_x.astype(np.float32)
+    val_x = val_x.astype(np.float32)
+    train_y = train_y.astype(np.int32)
+    val_y = val_y.astype(np.int32)
+
+    return train_x,train_y,val_x,val_y
+
diff --git a/examples/healthcare/data/kidneydata.py b/examples/healthcare/data/kidneydata.py
diff --git a/examples/healthcare/models/kidney_net.py b/examples/healthcare/models/kidney_net.py
@@ -30,10 +30,10 @@
 singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
 
 
-class MLP(model.Model):
+class KidneyNet(model.Model):
 
     def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
-        super(MLP, self).__init__()
+        super(KidneyNet, self).__init__()
         self.num_classes = num_classes
         self.dimension = 2
 
@@ -73,20 +73,13 @@ def set_optimizer(self, optimizer):
 
 
 def create_model(pretrained=False, **kwargs):
-    """Constructs a CNN model.
 
-    Args:
-        pretrained (bool): If True, returns a pre-trained model.
-
-    Returns:
-        The created CNN model.
-    """
-    model = MLP(**kwargs)
+    model = KidneyNet(**kwargs)
 
     return model
 
 
-__all__ = ['MLP', 'create_model']
+__all__ = ['KidneyNet', 'create_model']
 
 if __name__ == "__main__":
     np.random.seed(0)
@@ -131,7 +124,7 @@ def create_model(pretrained=False, **kwargs):
     sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
     tx = tensor.Tensor((400, 2), dev, precision)
     ty = tensor.Tensor((400,), dev, tensor.int32)
-    model = MLP(data_size=2, perceptron_size=3, num_classes=2)
+    model = KidneyNet(data_size=2, perceptron_size=3, num_classes=2)
 
     # attach model to graph
     model.set_optimizer(sgd)
@@ -145,5 +138,3 @@ def create_model(pretrained=False, **kwargs):
 
         if i % 100 == 0:
             print("training loss = ", tensor.to_numpy(loss)[0])
-
-