diff --git a/.asf.yaml b/.asf.yaml
index 1e0b37f9f..df9684076 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -24,5 +24,3 @@ github:
     wiki: true
     # Enable issues on github
     issues: true
-    # Enable settings on github
-    settings: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7dc1d2c4..ce0f0e33e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,10 +29,10 @@ LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
 #string(REGEX REPLACE "^[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}")
 
 
-SET(PACKAGE_VERSION 4.3.0) # ${VERSION})
-SET(VERSION 4.3.0)
-SET(SINGA_MAJOR_VERSION 4)
-SET(SINGA_MINOR_VERSION 3)
+SET(PACKAGE_VERSION 5.0.0) # ${VERSION})
+SET(VERSION 5.0.0)
+SET(SINGA_MAJOR_VERSION 5)
+SET(SINGA_MINOR_VERSION 0)
 SET(SINGA_PATCH_VERSION 0)
 #SET(SINGA_MAJOR_VERSION ${VERSION_MAJOR})  # 0 -
 #SET(SINGA_MINOR_VERSION ${VERSION_MINOR})  # 0 - 9
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index fe9384656..c8c75349b 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,31 @@
+Release Notes - SINGA - Version singa-5.0.0
+
+SINGA is a distributed deep learning library.
+
+This release includes following changes:
+
+  * Add the implementations of the healthcare model zoo.
+    * Add the implementation for the cardiovascular disease.
+    * Add the implementation for the diabetic disease.
+    * Add the implementation for the hematologic disease.
+    * Add the implementation for the kidney disease.
+    * Add the implementation for the malaria disease.
+    * Add the implementation for the thyroid eye disease.
+  
+  * Optimize the distributed training by updating the MSOptimizer and MSSGD.
+  
+  * Improve the efficiency of the transformer example.
+  
+  * Add the sparsification version of the model for the model selection example.
+  
+  * Update data processing for the benchmark dataset.
+  
+  * Update the pom.xml file to include paths for datasets.
+  
+  * Update the online documentations for the healthcare model zoo.
+
+----------------------------------------------------------------------------------------------
+
 Release Notes - SINGA - Version singa-4.3.0
 
 SINGA is a distributed deep learning library.
diff --git a/examples/healthcare/Hematologic_Disease/ClassDemo.py b/examples/healthcare/Hematologic_Disease/ClassDemo.py
deleted file mode 100644
index a6872f8cb..000000000
--- a/examples/healthcare/Hematologic_Disease/ClassDemo.py
+++ /dev/null
@@ -1,270 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import json
-import os
-import time
-from glob import glob
-
-import numpy as np
-from PIL import Image
-from singa import device, layer, model, opt, tensor
-from tqdm import tqdm
-
-from transforms import Compose, Normalize, ToTensor
-
-np_dtype = {"float16": np.float16, "float32": np.float32}
-singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
-
-
-class ClassDataset(object):
-    """Fetch data from file and generate batches.
-
-    Load data from folder as PIL.Images and convert them into batch array.
-
-    Args:
-        img_folder (Str): Folder path of the training/validation images.
-        transforms (Transform):  Preprocess transforms.
-    """
-    def __init__(self, img_folder, transforms):
-        super(ClassDataset, self).__init__()
-
-        self.img_list = list()
-        self.transforms = transforms
-
-        classes = os.listdir(img_folder)
-        for i in classes:
-            images = glob(os.path.join(img_folder, i, "*"))
-            for img in images:
-                self.img_list.append((img, i))
-    
-    def __len__(self) -> int:
-        return len(self.img_list)
-
-    def __getitem__(self, index: int):
-        img_path, label_str = self.img_list[index]
-        img = Image.open(img_path)
-        img = self.transforms.forward(img)
-        label = np.array(label_str, dtype=np.int32)
-
-        return img, label
-    
-    def batchgenerator(self, indexes, batch_size, data_size):
-        """Generate batch arrays from transformed image list.
-
-        Args:
-            indexes (Sequence): current batch indexes list, e.g. [n, n + 1, ..., n + batch_size]
-            batch_size (int): 
-            data_size (Tuple): input image size of shape (C, H, W) 
-
-        Return:
-            batch_x (Numpy ndarray): batch array of input images (B, C, H, W)
-            batch_y (Numpy ndarray): batch array of ground truth lables (B,)
-        """
-        batch_x = np.zeros((batch_size,) + data_size)
-        batch_y = np.zeros((batch_size,) + (1,), dtype=np.int32)
-        for idx, i in enumerate(indexes):
-            sample_x, sample_y = self.__getitem__(i)
-            batch_x[idx, :, :, :] = sample_x
-            batch_y[idx, :] = sample_y
-
-        return batch_x, batch_y
-
-
-class CNNModel(model.Model):
-    def __init__(self, num_classes):
-        super(CNNModel, self).__init__()
-        self.input_size = 28
-        self.dimension = 4
-        self.num_classes = num_classes
-        
-        self.layer1 = layer.Conv2d(16, kernel_size=3, activation="RELU")
-        self.bn1 = layer.BatchNorm2d()
-        self.layer2 = layer.Conv2d(16, kernel_size=3, activation="RELU")
-        self.bn2 = layer.BatchNorm2d()        
-        self.pooling2 = layer.MaxPool2d(kernel_size=2, stride=2)
-        self.layer3 = layer.Conv2d(64, kernel_size=3, activation="RELU")
-        self.bn3 = layer.BatchNorm2d()
-        self.layer4 = layer.Conv2d(64, kernel_size=3, activation="RELU")
-        self.bn4 = layer.BatchNorm2d()
-        self.layer5 = layer.Conv2d(64, kernel_size=3, padding=1, activation="RELU")
-        self.bn5 = layer.BatchNorm2d()
-        self.pooling5 = layer.MaxPool2d(kernel_size=2, stride=2)
-
-        self.flatten = layer.Flatten()
-
-        self.linear1 = layer.Linear(128)
-        self.linear2 = layer.Linear(128)
-        self.linear3 = layer.Linear(self.num_classes)
-
-        self.relu = layer.ReLU()
-
-        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
-        self.dropout = layer.Dropout(ratio=0.3)
-
-    def forward(self, x):
-        x = self.layer1(x)
-        x = self.bn1(x)
-        x = self.layer2(x)
-        x = self.bn2(x) 
-        x = self.pooling2(x)
-        
-        x = self.layer3(x)
-        x = self.bn3(x) 
-        x = self.layer4(x)
-        x = self.bn4(x) 
-        x = self.layer5(x)
-        x = self.bn5(x) 
-        x = self.pooling5(x)
-        x = self.flatten(x)
-        x = self.linear1(x)
-        x = self.relu(x)
-        x = self.linear2(x)
-        x = self.relu(x)
-        x = self.linear3(x)
-        return x
-
-    def set_optimizer(self, optimizer):
-        self.optimizer = optimizer
-
-    def train_one_batch(self, x, y, dist_option, spars):
-        out = self.forward(x)
-        loss = self.softmax_cross_entropy(out, y)
-
-        if dist_option == 'plain':
-            self.optimizer(loss)
-        elif dist_option == 'half':
-            self.optimizer.backward_and_update_half(loss)
-        elif dist_option == 'partialUpdate':
-            self.optimizer.backward_and_partial_update(loss)
-        elif dist_option == 'sparseTopK':
-            self.optimizer.backward_and_sparse_update(loss,
-                                                      topK=True,
-                                                      spars=spars)
-        elif dist_option == 'sparseThreshold':
-            self.optimizer.backward_and_sparse_update(loss,
-                                                      topK=False,
-                                                      spars=spars)
-        return out, loss
-
-
-def accuracy(pred, target):
-    """Compute recall accuracy.
-
-    Args:
-        pred (Numpy ndarray): Prediction array, should be in shape (B, C)
-        target (Numpy ndarray): Ground truth array, should be in shape (B, ) 
-
-    Return:
-        correct (Float): Recall accuracy
-    """
-    # y is network output to be compared with ground truth (int)
-    y = np.argmax(pred, axis=1)
-    a = (y[:,None]==target).sum()
-    correct = np.array(a, "int").sum()
-    return correct
-
-
-# Define pre-processing methods (transforms)
-transforms = Compose([
-    ToTensor(),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-])
-
-# Dataset loading
-dataset_path = "./bloodmnist"
-train_path = os.path.join(dataset_path, "train")
-val_path = os.path.join(dataset_path, "val") 
-cfg_path = os.path.join(dataset_path, "param.json")
-
-with open(cfg_path,'r') as load_f:
-    num_class = json.load(load_f)["num_classes"]
-
-train_dataset = ClassDataset(train_path, transforms)
-val_dataset = ClassDataset(val_path, transforms)
-
-batch_size = 256
-
-# Model configuration for CNN
-model = CNNModel(num_classes=num_class)
-criterion = layer.SoftMaxCrossEntropy()
-optimizer_ft = opt.Adam(lr=1e-3)
-
-# Start training
-dev = device.create_cpu_device()
-dev.SetRandSeed(0)
-np.random.seed(0)
-
-tx = tensor.Tensor(
-        (batch_size, 3, model.input_size, model.input_size), dev,
-        singa_dtype['float32'])
-ty = tensor.Tensor((batch_size,), dev, tensor.int32)
-
-num_train_batch = train_dataset.__len__() // batch_size
-num_val_batch = val_dataset.__len__() // batch_size
-idx = np.arange(train_dataset.__len__(), dtype=np.int32)
-
-model.set_optimizer(optimizer_ft)
-model.compile([tx], is_train=True, use_graph=False, sequential=False)
-dev.SetVerbosity(0)
-
-max_epoch = 100
-for epoch in range(max_epoch):
-    print(f'Epoch {epoch}:')
-    
-    start_time = time.time()
-
-    train_correct = np.zeros(shape=[1], dtype=np.float32)
-    test_correct = np.zeros(shape=[1], dtype=np.float32)
-    train_loss = np.zeros(shape=[1], dtype=np.float32)
-
-    # Training part
-    model.train()
-    for b in tqdm(range(num_train_batch)):
-        # Extract batch from image list
-        x, y = train_dataset.batchgenerator(idx[b * batch_size:(b + 1) * batch_size], 
-            batch_size=batch_size, data_size=(3, model.input_size, model.input_size))
-        x = x.astype(np_dtype['float32'])
-
-        tx.copy_from_numpy(x)
-        ty.copy_from_numpy(y)
-
-        out, loss = model(tx, ty, dist_option="plain", spars=None)
-        train_correct += accuracy(tensor.to_numpy(out), y)
-        train_loss += tensor.to_numpy(loss)[0]
-    print('Training loss = %f, training accuracy = %f' %
-                  (train_loss, train_correct /
-                   (num_train_batch * batch_size)))
-
-    # Validation part
-    model.eval()
-    for b in tqdm(range(num_val_batch)):
-        x, y = train_dataset.batchgenerator(idx[b * batch_size:(b + 1) * batch_size], 
-            batch_size=batch_size, data_size=(3, model.input_size, model.input_size))
-        x = x.astype(np_dtype['float32'])
-
-        tx.copy_from_numpy(x)
-        ty.copy_from_numpy(y)
-
-        out = model(tx)
-        test_correct += accuracy(tensor.to_numpy(out), y)
-    
-    print('Evaluation accuracy = %f, Elapsed Time = %fs' %
-                  (test_correct / (num_val_batch * batch_size),
-                   time.time() - start_time))
diff --git a/examples/healthcare/Hematologic_Disease/Readme.md b/examples/healthcare/Hematologic_Disease/Readme.md
deleted file mode 100644
index c519e9d7d..000000000
--- a/examples/healthcare/Hematologic_Disease/Readme.md
+++ /dev/null
@@ -1,45 +0,0 @@
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
--->
-# CNN demo model on BloodMnist dataset
-
-## About dataset
-Download address: https://drive.google.com/drive/folders/1Ze9qri1UtAsIRoI0SJ4YRpdt5kUUMBEn?usp=sharing
-
-The BloodMNIST , as a sub set of [MedMNIST](https://medmnist.com/), is based on a dataset of individual normal cells, captured from individuals without infection, hematologic or oncologic disease and free of any pharmacologic treatment at the moment of blood collection. 
-It contains a total of 17,092 images and is organized into 8 classes. 
-it is split with a ratio of 7:1:2 into training, validation and test set. 
-The source images with resolution 3×360×363 pixels are center-cropped into 3×200×200, and then resized into 3×28×28.
-
-8 classes of the dataset: 
-```python
-"0": "basophil",
-"1": "eosinophil",
-"2": "erythroblast",
-"3": "ig (immature granulocytes)",
-"4": "lymphocyte",
-"5": "monocyte",
-"6": "neutrophil",
-"7": "platelet"
-```
-
-# Run the demo
-Run
-```
-python ClassDemo.py
-```
\ No newline at end of file
diff --git a/examples/healthcare/Hematologic_Disease/transforms.py b/examples/healthcare/Hematologic_Disease/transforms.py
deleted file mode 100644
index 5b5111798..000000000
--- a/examples/healthcare/Hematologic_Disease/transforms.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-
-import numpy as np
-from PIL import Image
-
-
-class Compose(object):
-    """Compose several transforms together.
-
-    Args:
-        transforms: list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.ToTensor(),
-        >>>     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-        >>> ])
-
-    """
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def forward(self, img):
-        """
-        Args:
-            img (PIL Image or numpy array): Image to be processed.
-
-        Returns:
-            PIL Image or numpy array: Processed image.
-        """
-        for t in self.transforms:
-            img = t.forward(img)
-        return img
-
-    def __repr__(self):
-        format_string = self.__class__.__name__ + '('
-        for t in self.transforms:
-            format_string += '\n'
-            format_string += '    {0}'.format(t)
-        format_string += '\n)'
-        return format_string
-
-
-class ToTensor(object):
-    """Convert a ``PIL Image`` to ``numpy.ndarray``.
-
-    Converts a PIL Image (H x W x C) in the range [0, 255] to a ``numpy.array`` of shape 
-    (C x H x W) in the range [0.0, 1.0]
-    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1).
-
-    In the other cases, tensors are returned without scaling.
-
-    .. note::
-        Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
-        transforming target image masks. 
-    """
-
-    def forward(self, pic):
-        """
-        Args:
-            pic (PIL Image): Image to be converted to array.
-
-        Returns:
-            Array: Converted image.
-        """
-        if not isinstance(pic, Image.Image):
-           raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
-
-        # Handle PIL Image
-        mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32}
-        img = np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)
-
-        if pic.mode == '1':
-            img = 255 * img
-
-        # Put it from HWC to CHW format
-        img = np.transpose(img, (2, 0, 1))
-
-        if img.dtype == np.uint8:
-            return np.array(np.float32(img)/255.0, dtype=np.float)
-        else:
-            return np.float(img)
-
-    def __repr__(self):
-        return self.__class__.__name__ + '()'
-
-
-class Normalize(object):
-    """Normalize a ``numpy.array`` image with mean and standard deviation.
-    
-    This transform does not support PIL Image.
-    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
-    channels, this transform will normalize each channel of the input
-    ``numpy.array`` i.e.,
-    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
-
-    .. note::
-        This transform acts out of place, i.e., it does not mutate the input array.
-
-    Args:
-        mean (Sequence): Sequence of means for each channel.
-        std (Sequence): Sequence of standard deviations for each channel.
-        inplace(bool, optional): Bool to make this operation in-place.
-
-    """
-
-    def __init__(self, mean, std, inplace=False):
-        super().__init__()
-        self.mean = mean
-        self.std = std
-        self.inplace = inplace
-
-    def forward(self, img: np.ndarray):
-        """
-        Args:
-            img (Numpy ndarray): Array image to be normalized.
-
-        Returns:
-            d_res (Numpy ndarray): Normalized Tensor image.
-        """
-        if not isinstance(img, np.ndarray):
-            raise TypeError('Input img should be a numpy array. Got {}.'.format(type(img)))
-
-        if not img.dtype == np.float:
-            raise TypeError('Input array should be a float array. Got {}.'.format(img.dtype))
-
-        if img.ndim < 3:
-            raise ValueError('Expected array to be an array image of size (..., C, H, W). Got img.shape = '
-                            '{}.'.format(img.shape))
-
-        if not self.inplace:
-            img = img.copy()
-
-        dtype = img.dtype
-        mean = np.array(self.mean, dtype=dtype)
-        std = np.array(self.std, dtype=dtype)
-        if (std == 0).any():
-            raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
-        s_res = np.subtract(img, mean[:, None, None])
-        d_res = np.divide(s_res, std[:, None, None])
-
-        return d_res
-
-
-    def __repr__(self):
-        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
-
-
diff --git a/examples/healthcare/Malaria_Detection/README.md b/examples/healthcare/Malaria_Detection/README.md
deleted file mode 100644
index b9dcbf239..000000000
--- a/examples/healthcare/Malaria_Detection/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
--->
-
-# Singa for Malaria Detection Task
-
-## Malaria
-
-Malaria is caused by parasites and could be transmitted through infected mosquitoes. There are about 200 million cases worldwide, and about 400,000 deaths per year, therefore, malaria does lots of harm to global health.
-
-Although Malaria is a curable disease, inadequate diagnostics make it harder to reduce mortality, as a result, a fast and reliable diagnostic test is a promising and effective way to fight malaria.
-
-To mitigate the problem, we use Singa to implement a machine learning model to help with Malaria diagnosis. The dataset is from Kaggle https://www.kaggle.com/datasets/miracle9to9/files1?resource=download. Please download the dataset before running the scripts.
-
-## Structure
-
-* `data` includes the scripts for preprocessing Malaria image datasets.
-
-* `model` includes the CNN model construction codes by creating
-  a subclass of `Module` to wrap the neural network operations 
-  of each model.
-
-* `train_cnn.py` is the training script, which controls the training flow by
-  doing BackPropagation and SGD update.
-
-## Command
-```bash
-python train_cnn.py cnn malaria -dir pathToDataset
-```
\ No newline at end of file
diff --git a/examples/healthcare/Malaria_Detection/data/malaria.py b/examples/healthcare/Malaria_Detection/data/malaria.py
deleted file mode 100644
index 46422b739..000000000
--- a/examples/healthcare/Malaria_Detection/data/malaria.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-try:
-    import pickle
-except ImportError:
-    import cPickle as pickle
-
-import numpy as np
-import os
-import sys
-from PIL import Image
-
-
-# need to save to specific local directories
-def load_train_data(dir_path="/tmp/malaria", resize_size=(128, 128)):
-    dir_path = check_dataset_exist(dirpath=dir_path)
-    path_train_label_1 = os.path.join(dir_path, "training_set/Parasitized")
-    path_train_label_0 = os.path.join(dir_path, "training_set/Uninfected")
-    train_label_1 = load_image_path(os.listdir(path_train_label_1))
-    train_label_0 = load_image_path(os.listdir(path_train_label_0))
-    labels = []
-    Images = np.empty((len(train_label_1) + len(train_label_0),
-                       3, resize_size[0], resize_size[1]), dtype=np.uint8)
-    for i in range(len(train_label_0)):
-        image_path = os.path.join(path_train_label_0, train_label_0[i])
-        temp_image = np.array(Image.open(image_path).resize(
-            resize_size).convert("RGB")).transpose(2, 0, 1)
-        Images[i] = temp_image
-        labels.append(0)
-    for i in range(len(train_label_1)):
-        image_path = os.path.join(path_train_label_1, train_label_1[i])
-        temp_image = np.array(Image.open(image_path).resize(
-            resize_size).convert("RGB")).transpose(2, 0, 1)
-        Images[i + len(train_label_0)] = temp_image
-        labels.append(1)
-
-    Images = np.array(Images, dtype=np.float32)
-    labels = np.array(labels, dtype=np.int32)
-    return Images, labels
-
-
-# need to save to specific local directories
-def load_test_data(dir_path='/tmp/malaria', resize_size=(128, 128)):
-    dir_path = check_dataset_exist(dirpath=dir_path)
-    path_test_label_1 = os.path.join(dir_path, "testing_set/Parasitized")
-    path_test_label_0 = os.path.join(dir_path, "testing_set/Uninfected")
-    test_label_1 = load_image_path(os.listdir(path_test_label_1))
-    test_label_0 = load_image_path(os.listdir(path_test_label_0))
-    labels = []
-    Images = np.empty((len(test_label_1) + len(test_label_0),
-                       3, resize_size[0], resize_size[1]), dtype=np.uint8)
-    for i in range(len(test_label_0)):
-        image_path = os.path.join(path_test_label_0, test_label_0[i])
-        temp_image = np.array(Image.open(image_path).resize(
-            resize_size).convert("RGB")).transpose(2, 0, 1)
-        Images[i] = temp_image
-        labels.append(0)
-    for i in range(len(test_label_1)):
-        image_path = os.path.join(path_test_label_1, test_label_1[i])
-        temp_image = np.array(Image.open(image_path).resize(
-            resize_size).convert("RGB")).transpose(2, 0, 1)
-        Images[i + len(test_label_0)] = temp_image
-        labels.append(1)
-
-    Images = np.array(Images, dtype=np.float32)
-    labels = np.array(labels, dtype=np.int32)
-    return Images, labels
-
-
-def load_image_path(list):
-    new_list = []
-    for image_path in list:
-        if (image_path.endswith(".png") or image_path.endswith(".jpg")):
-            new_list.append(image_path)
-    return new_list
-
-
-def check_dataset_exist(dirpath):
-    if not os.path.exists(dirpath):
-        print(
-            'Please download the malaria dataset first'
-        )
-        sys.exit(0)
-    return dirpath
-
-
-def normalize(train_x, val_x):
-    mean = [0.5339, 0.4180, 0.4460]  # mean for malaria dataset
-    std = [0.3329, 0.2637, 0.2761]  # std for malaria dataset
-    train_x /= 255
-    val_x /= 255
-    for ch in range(0, 2):
-        train_x[:, ch, :, :] -= mean[ch]
-        train_x[:, ch, :, :] /= std[ch]
-        val_x[:, ch, :, :] -= mean[ch]
-        val_x[:, ch, :, :] /= std[ch]
-    return train_x, val_x
-
-
-def load(dir_path):
-    train_x, train_y = load_train_data(dir_path=dir_path)
-    val_x, val_y = load_test_data(dir_path=dir_path)
-    train_x, val_x = normalize(train_x, val_x)
-    train_y = train_y.flatten()
-    val_y = val_y.flatten()
-    return train_x, train_y, val_x, val_y
diff --git a/examples/healthcare/Malaria_Detection/model/cnn.py b/examples/healthcare/Malaria_Detection/model/cnn.py
deleted file mode 100644
index 856adb7e7..000000000
--- a/examples/healthcare/Malaria_Detection/model/cnn.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from singa import layer
-from singa import model
-
-
-class CNN(model.Model):
-
-    def __init__(self, num_classes=10, num_channels=1):
-        super(CNN, self).__init__()
-        self.num_classes = num_classes
-        self.input_size = 128
-        self.dimension = 4
-        self.conv1 = layer.Conv2d(num_channels, 32, 3, padding=0, activation="RELU")
-        self.conv2 = layer.Conv2d(32, 64, 3, padding=0, activation="RELU")
-        self.conv3 = layer.Conv2d(64, 64, 3, padding=0, activation="RELU")
-        self.linear1 = layer.Linear(128)
-        self.linear2 = layer.Linear(num_classes)
-        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
-        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
-        self.pooling3 = layer.MaxPool2d(2, 2, padding=0)
-        self.relu = layer.ReLU()
-        self.flatten = layer.Flatten()
-        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
-        self.sigmoid = layer
-
-    def forward(self, x):
-        y = self.conv1(x)
-        y = self.pooling1(y)
-        y = self.conv2(y)
-        y = self.pooling2(y)
-        y = self.conv3(y)
-        y = self.pooling3(y)
-        y = self.flatten(y)
-        y = self.linear1(y)
-        y = self.relu(y)
-        y = self.linear2(y)
-        return y
-
-    def train_one_batch(self, x, y, dist_option, spars):
-        out = self.forward(x)
-        loss = self.softmax_cross_entropy(out, y)
-
-        if dist_option == 'plain':
-            self.optimizer(loss)
-        elif dist_option == 'half':
-            self.optimizer.backward_and_update_half(loss)
-        elif dist_option == 'partialUpdate':
-            self.optimizer.backward_and_partial_update(loss)
-        elif dist_option == 'sparseTopK':
-            self.optimizer.backward_and_sparse_update(loss,
-                                                      topK=True,
-                                                      spars=spars)
-        elif dist_option == 'sparseThreshold':
-            self.optimizer.backward_and_sparse_update(loss,
-                                                      topK=False,
-                                                      spars=spars)
-        return out, loss
-
-    def set_optimizer(self, optimizer):
-        self.optimizer = optimizer
-
-
-def create_model(**kwargs):
-    """Constructs a CNN model.
-
-    Args:
-        pretrained (bool): If True, returns a pre-trained model.
-
-    Returns:
-        The created CNN model.
-    """
-    model = CNN(**kwargs)
-
-    return model
-
-
-__all__ = ['CNN', 'create_model']
diff --git a/examples/healthcare/application/Cardiovascular_Disease/README.md b/examples/healthcare/application/Cardiovascular_Disease/README.md
new file mode 100644
index 000000000..a7ed42d2a
--- /dev/null
+++ b/examples/healthcare/application/Cardiovascular_Disease/README.md
@@ -0,0 +1,41 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Singa for Cardiovascular Disease Detection Task
+
+## Cardiovascular Disease
+
+Cardiovascular disease is primarily caused by risk factors like high blood pressure, unhealthy diet, and physical inactivity. As the leading cause of death globally, it accounts for approximately 17.9 million fatalities annually, representing 31% of all global deaths. This makes cardiovascular disease the most significant threat to human health worldwide.
+
+Although early detection can significantly improve outcomes, insufficient screening methods and delayed diagnosis often lead to preventable complications. Therefore, developing rapid and accurate diagnostic tools is crucial for effective prevention and treatment of cardiovascular conditions.
+
+To address this challenge, we utilize Singa to develop a machine learning model for cardiovascular disease risk prediction. The training dataset is sourced from Kaggle https://www.kaggle.com/datasets/sulianova/cardiovascular-disease-dataset. You can download the dataset, pass the path to the script, and then you can run the program by using the script.
+
+## Structure
+
+* `cardiovascular.py` in the `healthcare/data` directory is the scripts for preprocessing Cardiovascular Disease datasets.
+
+* `cardionet.py` in the `healthcare/models` directory includes the MLP model construction codes.
+
+* `train.py` is the training script, which controls the training flow by doing BackPropagation and SGD update.
+
+## Command
+```bash
+python train.py cardionet -dir pathToDataset
+```
diff --git a/examples/healthcare/application/Cardiovascular_Disease/run.sh b/examples/healthcare/application/Cardiovascular_Disease/run.sh
new file mode 100644
index 000000000..0ecb8231a
--- /dev/null
+++ b/examples/healthcare/application/Cardiovascular_Disease/run.sh
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+### cardiovascular dataset
+python train.py cardionet -dir pathToDataset
diff --git a/examples/healthcare/application/Cardiovascular_Disease/train.py b/examples/healthcare/application/Cardiovascular_Disease/train.py
new file mode 100644
index 000000000..2caa68998
--- /dev/null
+++ b/examples/healthcare/application/Cardiovascular_Disease/train.py
@@ -0,0 +1,328 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from singa import singa_wrap as singa
+from singa import device
+from singa import tensor
+from singa import opt
+import numpy as np
+import time
+import argparse
+from PIL import Image
+from data import cardiovascular
+from model import cardionet
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+# Data augmentation
+def augmentation(x, batch_size):
+    xpad = np.pad(x, [[0, 0], [0, 0], [4, 4], [4, 4]], 'symmetric')
+    for data_num in range(0, batch_size):
+        offset = np.random.randint(8, size=2)
+        x[data_num, :, :, :] = xpad[data_num, :,
+                               offset[0]:offset[0] + x.shape[2],
+                               offset[1]:offset[1] + x.shape[2]]
+        if_flip = np.random.randint(2)
+        if (if_flip):
+            x[data_num, :, :, :] = x[data_num, :, :, ::-1]
+    return x
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def resize_dataset(x, image_size):
+    num_data = x.shape[0]
+    dim = x.shape[1]
+    X = np.zeros(shape=(num_data, dim, image_size, image_size),
+                 dtype=np.float32)
+    for n in range(0, num_data):
+        for d in range(0, dim):
+            X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
+                (image_size, image_size), Image.BILINEAR),
+                dtype=np.float32)
+    return X
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        max_epoch,
+        batch_size,
+        model,
+        sgd,
+        graph,
+        verbosity,
+        path,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    dev = device.get_default_device()
+    #dev = device.create_cuda_gpu_on(local_rank)
+    # need to change to CPU device for CPU-only machines
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+   
+    train_x, train_y, val_x, val_y = cardiovascular.load(path)
+
+
+    num_channels = 1
+    image_size = 1
+    data_size = train_x.shape[1]
+
+    num_classes = 2
+
+    if model == 'resnet':
+        from model import resnet
+        model = resnet.resnet50(num_channels=num_channels,
+                                num_classes=num_classes)
+    elif model == 'xceptionnet':
+        from model import xceptionnet
+        model = xceptionnet.create_model(num_channels=num_channels,
+                                         num_classes=num_classes)
+    elif model == 'cnn':
+        from model import cnn
+        model = cnn.create_model(num_channels=num_channels,
+                                 num_classes=num_classes)
+    elif model == 'alexnet':
+        from model import alexnet
+        model = alexnet.create_model(num_channels=num_channels,
+                                     num_classes=num_classes)
+    elif model == 'cardionet':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
+        model = cardionet.create_model(data_size=data_size, perceptron_size=1000, num_classes=num_classes)
+
+    # For distributed training, sequential has better performance
+    if hasattr(sgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    if model.dimension == 4:
+        tx = tensor.Tensor(
+            (batch_size, num_channels, model.input_size, model.input_size), dev,
+            singa_dtype[precision])
+    elif model.dimension == 2:
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+        # np.reshape(train_x, (train_x.shape[0], -1))
+        # np.reshape(val_x, (val_x.shape[0], -1))
+
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % (epoch))
+
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        for b in range(num_train_batch):
+            # if b % 100 == 0:
+            #     print ("b: \n", b)
+            # Generate the patch data in this iteration
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            if model.dimension == 4:
+                x = augmentation(x, batch_size)
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+            # Copy the patch data into input tensors
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+
+            # Train the model
+            out, loss = model(tx, ty, dist_option, spars)
+            train_correct += accuracy(tensor.to_numpy(out), y)
+            train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce the evaluation accuracy and loss from multiple devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            if model.dimension == 4:
+                if (image_size != model.input_size):
+                    x = resize_dataset(x, model.input_size)
+            x = x.astype(np_dtype[precision])
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce the evaulation accuracy from multiple devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        # Output the evaluation accuracy
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'cardionet', 'alexnet'],
+        default='cardionet')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=100,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.001,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+    parser.add_argument('-dir',
+                        '--path-to-dataset',
+                        default=None,
+                        help='path to dataset',
+                        dest='path')                    
+                        
+
+    args = parser.parse_args()
+
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    run(0,
+        1,
+        args.device_id,
+        args.max_epoch,
+        args.batch_size,
+        args.model,
+        sgd,
+        args.graph,
+        args.verbosity,
+        args.path,
+        precision=args.precision)
diff --git a/examples/healthcare/application/Diabetic_Readmission_Prediction/README.md b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/README.md
similarity index 84%
rename from examples/healthcare/application/Diabetic_Readmission_Prediction/README.md
rename to examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/README.md
index c58e6375a..d4378466b 100644
--- a/examples/healthcare/application/Diabetic_Readmission_Prediction/README.md
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/README.md
@@ -17,7 +17,7 @@
     under the License.
 -->
 
-# Singa for Diabetic Readmission Prediction task
+# Singa for Diabetic Readmission Prediction Task
 
 ## Diabetic Readmission
 
@@ -25,7 +25,7 @@ Diabetic readmission is a significant concern in healthcare, with a substantial
 
 Although diabetes is a manageable condition, early identification of patients at high risk of readmission remains a challenge. A reliable and efficient predictive model can help identify these patients, enabling healthcare providers to intervene early and prevent unnecessary readmissions.
 
-To address this issue, we use Singa to implement a machine learning model for predicting diabetic readmission. The dataset is from [BMC Medical Informatics and Decision-Making](https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-021-01423-y). Please download the dataset before running the scripts.
+To address this issue, we use Singa to implement a machine learning model for predicting diabetic readmission. The dataset is from [Diabetes 130-US Hospitals for Years 1999-2008](https://archive.ics.uci.edu/ml/datasets/diabetes+130-us+hospitals+for+years+1999-2008).
 
 
 ## Structure
@@ -36,10 +36,10 @@ To address this issue, we use Singa to implement a machine learning model for pr
   a subclass of `Module` to wrap the neural network operations 
   of each model.
 
-* `train_mlp.py` is the training script, which controls the training flow by
+* `train.py` is the training script, which controls the training flow by
   doing BackPropagation and SGD update.
 
 ## Command
 ```bash
-python train.py mlp diabetic
+python train.py diabeticnet
 ```
diff --git a/examples/healthcare/Malaria_Detection/run.sh b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/run.sh
similarity index 91%
rename from examples/healthcare/Malaria_Detection/run.sh
rename to examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/run.sh
index 14718208b..0edd94d6e 100644
--- a/examples/healthcare/Malaria_Detection/run.sh
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/run.sh
@@ -16,5 +16,5 @@
 # limitations under the License.
 #
 
-### malaria dataset
-python train_cnn.py cnn malaria -dir pathToDataset
+### diabetic dataset
+python train.py diabeticnet
diff --git a/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/train.py b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/train.py
new file mode 100644
index 000000000..30a6de78e
--- /dev/null
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Readmission_Prediction/train.py
@@ -0,0 +1,267 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import device
+from singa import tensor
+from singa import opt
+import numpy as np
+import time
+import argparse
+import sys
+sys.path.append("../../../..")
+from healthcare.data import diabetic
+from healthcare.models import diabetic_net
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+# Calculate accuracy
+def accuracy(pred, target):
+    # y is network output to be compared with ground truth (int)
+    y = np.argmax(pred, axis=1)
+    a = y == target
+    correct = np.array(a, "int").sum()
+    return correct
+
+
+# Data partition according to the rank
+def partition(global_rank, world_size, train_x, train_y, val_x, val_y):
+    # Partition training data
+    data_per_rank = train_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    train_x = train_x[idx_start:idx_end]
+    train_y = train_y[idx_start:idx_end]
+
+    # Partition evaluation data
+    data_per_rank = val_x.shape[0] // world_size
+    idx_start = global_rank * data_per_rank
+    idx_end = (global_rank + 1) * data_per_rank
+    val_x = val_x[idx_start:idx_end]
+    val_y = val_y[idx_start:idx_end]
+    return train_x, train_y, val_x, val_y
+
+
+# Function to all reduce NUMPY accuracy and loss from multiple devices
+def reduce_variable(variable, dist_opt, reducer):
+    reducer.copy_from_numpy(variable)
+    dist_opt.all_reduce(reducer.data)
+    dist_opt.wait()
+    output = tensor.to_numpy(reducer)
+    return output
+
+
+def run(global_rank,
+        world_size,
+        local_rank,
+        max_epoch,
+        batch_size,
+        model,
+        data,
+        sgd,
+        graph,
+        verbosity,
+        dist_option='plain',
+        spars=None,
+        precision='float32'):
+    dev = device.create_cpu_device()  # now CPU version only, could change to GPU device for GPU-support machines
+    dev.SetRandSeed(0)
+    np.random.seed(0)
+
+    # Load data based on specified dataset
+    if data == 'diabetic':
+        train_x, train_y, val_x, val_y = diabetic.load()
+    elif data == 'mnist' or data == 'cifar10' or data == 'cifar100':
+        raise ValueError("Only 'diabetic' dataset (2D table data) is supported with MLP model.")
+
+    # Ensure the data is already 2D (train_x.shape[1:] should have only one dimension)
+    data_size = train_x.shape[1]
+    num_classes = int(np.max(train_y) + 1)
+
+    # Initialize MLP model
+    if model == 'diabeticnet':
+        model = diabetic_net.create_model(data_size=data_size,
+                                          num_classes=num_classes)
+    else:
+        print(
+            'Wrong model!'
+        )
+        sys.exit(0)
+    # Setup distributed training flags
+    if hasattr(sgd, "communicator"):
+        DIST = True
+        sequential = True
+    else:
+        DIST = False
+        sequential = False
+
+    # Partition data if distributed training is used
+    if DIST:
+        train_x, train_y, val_x, val_y = partition(global_rank, world_size,
+                                                   train_x, train_y, val_x,
+                                                   val_y)
+
+    # Define tensors for inputs and labels
+    tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
+    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+
+    num_train_batch = train_x.shape[0] // batch_size
+    num_val_batch = val_x.shape[0] // batch_size
+    idx = np.arange(train_x.shape[0], dtype=np.int32)
+
+    # Attach optimizer to model
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=graph, sequential=sequential)
+    dev.SetVerbosity(verbosity)
+
+    # Training and evaluation loop
+    for epoch in range(max_epoch):
+        start_time = time.time()
+        np.random.shuffle(idx)
+
+        if global_rank == 0:
+            print('Starting Epoch %d:' % epoch)
+
+        # Training phase
+        train_correct = np.zeros(shape=[1], dtype=np.float32)
+        test_correct = np.zeros(shape=[1], dtype=np.float32)
+        train_loss = np.zeros(shape=[1], dtype=np.float32)
+
+        model.train()
+        for b in range(num_train_batch):
+            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
+            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
+
+            x = x.astype(np_dtype[precision])  # Ensure correct precision
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+
+            # Train the model
+            out, loss = model(tx, ty, dist_option, spars)
+            train_correct += accuracy(tensor.to_numpy(out), y)
+            train_loss += tensor.to_numpy(loss)[0]
+
+        if DIST:
+            # Reduce training stats across distributed devices
+            reducer = tensor.Tensor((1,), dev, tensor.float32)
+            train_correct = reduce_variable(train_correct, sgd, reducer)
+            train_loss = reduce_variable(train_loss, sgd, reducer)
+
+        if global_rank == 0:
+            print('Training loss = %f, training accuracy = %f' %
+                  (train_loss, train_correct /
+                   (num_train_batch * batch_size * world_size)),
+                  flush=True)
+
+        # Evaluation phase
+        model.eval()
+        for b in range(num_val_batch):
+            x = val_x[b * batch_size:(b + 1) * batch_size]
+            y = val_y[b * batch_size:(b + 1) * batch_size]
+
+            x = x.astype(np_dtype[precision])
+            tx.copy_from_numpy(x)
+            ty.copy_from_numpy(y)
+
+            out_test = model(tx)
+            test_correct += accuracy(tensor.to_numpy(out_test), y)
+
+        if DIST:
+            # Reduce evaluation stats across distributed devices
+            test_correct = reduce_variable(test_correct, sgd, reducer)
+
+        if global_rank == 0:
+            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
+                  (test_correct / (num_val_batch * batch_size * world_size),
+                   time.time() - start_time),
+                  flush=True)
+
+    dev.PrintTimeProfiling()
+
+
+
+if __name__ == '__main__':
+    # Use argparse to get command config: max_epoch, model, data, etc., for single gpu training
+    parser = argparse.ArgumentParser(
+        description='Training using the autograd and graph.')
+    parser.add_argument(
+        'model',
+        choices=['cnn', 'resnet', 'xceptionnet', 'mlp', 'alexnet', 'diabeticnet'],
+        default='diabeticnet')
+    parser.add_argument('data',
+                        choices=['mnist', 'cifar10', 'cifar100', 'diabetic'],
+                        default='diabetic')
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=100,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    parser.add_argument('-b',
+                        '--batch-size',
+                        default=64,
+                        type=int,
+                        help='batch size',
+                        dest='batch_size')
+    parser.add_argument('-l',
+                        '--learning-rate',
+                        default=0.005,
+                        type=float,
+                        help='initial learning rate',
+                        dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-v',
+                        '--log-verbosity',
+                        default=0,
+                        type=int,
+                        help='logging verbosity',
+                        dest='verbosity')
+
+    args = parser.parse_args()
+
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
+    run(0,
+        1,
+        args.device_id,
+        args.max_epoch,
+        args.batch_size,
+        args.model,
+        args.data,
+        sgd,
+        args.graph,
+        args.verbosity,
+        precision=args.precision)
diff --git a/examples/healthcare/application/Diabetic_Retinopathy_Classification/README.md b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/README.md
similarity index 95%
rename from examples/healthcare/application/Diabetic_Retinopathy_Classification/README.md
rename to examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/README.md
index dfa88fd50..ef0194bcb 100644
--- a/examples/healthcare/application/Diabetic_Retinopathy_Classification/README.md
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/README.md
@@ -42,10 +42,10 @@ To mitigate the problem, we use Singa to implement a machine learning model to h
   a subclass of `Module` to wrap the neural network operations 
   of each model.
 
-* `train_cnn.py` is the training script, which controls the training flow by
+* `train.py` is the training script, which controls the training flow by
   doing BackPropagation and SGD update.
 
 ## Command
 ```bash
-python train_cnn.py cnn diaret -dir pathToDataset
+python train.py drnet -dir pathToDataset
 ```
diff --git a/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/run.sh b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/run.sh
new file mode 100644
index 000000000..1773b067f
--- /dev/null
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/run.sh
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+### diabetic retinopathy dataset
+python train.py drnet -dir pathToDataset
diff --git a/examples/healthcare/application/Diabetic_Retinopathy_Classification/train.py b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/train.py
similarity index 91%
rename from examples/healthcare/application/Diabetic_Retinopathy_Classification/train.py
rename to examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/train.py
index 5ef41851a..e3f3649f2 100644
--- a/examples/healthcare/application/Diabetic_Retinopathy_Classification/train.py
+++ b/examples/healthcare/application/Diabetic_Disease/Diabetic_Retinopathy_Classification/train.py
@@ -1,3 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 from singa import singa_wrap as singa
 from singa import device
 from singa import tensor
@@ -6,7 +24,7 @@
 import time
 import argparse
 import sys
-sys.path.append("../../..")
+sys.path.append("../../../..")
 
 from PIL import Image
 
@@ -111,7 +129,7 @@ def run(global_rank,
     data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
     num_classes = (np.max(train_y) + 1).item()
 
-    if model == 'cnn':
+    if model == 'drnet':
         model = diabetic_retinopthy_net.create_model(num_channels=num_channels,
                                                      num_classes=num_classes)
     else:
@@ -234,8 +252,8 @@ def run(global_rank,
         description='Training using the autograd and graph.')
     parser.add_argument(
         'model',
-        choices=['cnn'],
-        default='cnn')
+        choices=['drnet'],
+        default='drnet')
     parser.add_argument('data',
                         choices=['diaret'],
                         default='diaret')
diff --git a/examples/healthcare/application/Hematologic_Disease/readme.md b/examples/healthcare/application/Hematologic_Disease/readme.md
index d0f4902b9..26564c0d2 100644
--- a/examples/healthcare/application/Hematologic_Disease/readme.md
+++ b/examples/healthcare/application/Hematologic_Disease/readme.md
@@ -16,10 +16,11 @@
     specific language governing permissions and limitations
     under the License.
 -->
-# CNN demo model on BloodMnist dataset
+# Train a hematologic net model on BloodMnist dataset
+
+This example is to train a hematologic net model over the BloodMnist dataset.
 
 ## About dataset
-Download address: https://drive.google.com/drive/folders/1Ze9qri1UtAsIRoI0SJ4YRpdt5kUUMBEn?usp=sharing
 
 The BloodMNIST , as a sub set of [MedMNIST](https://medmnist.com/), is based on a dataset of individual normal cells, captured from individuals without infection, hematologic or oncologic disease and free of any pharmacologic treatment at the moment of blood collection. 
 It contains a total of 17,092 images and is organized into 8 classes. 
@@ -27,7 +28,7 @@ it is split with a ratio of 7:1:2 into training, validation and test set.
 The source images with resolution 3×360×363 pixels are center-cropped into 3×200×200, and then resized into 3×28×28.
 
 8 classes of the dataset: 
-```python
+```
 "0": "basophil",
 "1": "eosinophil",
 "2": "erythroblast",
@@ -38,7 +39,12 @@ The source images with resolution 3×360×363 pixels are center-cropped into 3×
 "7": "platelet"
 ```
 
-## Command
+## Running instructions
+
+1. Download the pre-processed [BloodMnist dataset](https://github.com/lzjpaul/singa-healthcare/blob/main/data/bloodmnist/bloodmnist.tar.gz) to a folder(pathToDataset), which contains a few training samples and test samples. For the complete BloodMnist dataset, please download it via this [link](https://github.com/gzrp/bloodmnist/blob/master/bloodmnist.zip).
+
+2. Start the training
+
 ```bash
-python train_cnn.py cnn bloodmnist -dir pathToDataset
+python train.py hematologicnet -dir pathToDataset
 ```
diff --git a/examples/healthcare/application/Hematologic_Disease/run.sh b/examples/healthcare/application/Hematologic_Disease/run.sh
index c4a321ede..9b8d777ec 100644
--- a/examples/healthcare/application/Hematologic_Disease/run.sh
+++ b/examples/healthcare/application/Hematologic_Disease/run.sh
@@ -17,4 +17,4 @@
 #
 
 ### bloodminist dataset
-python train_cnn.py cnn bloodminist -dir pathToDataset
+python train.py hematologicnet -dir pathToDataset
diff --git a/examples/healthcare/application/Hematologic_Disease/train_cnn.py b/examples/healthcare/application/Hematologic_Disease/train.py
similarity index 87%
rename from examples/healthcare/application/Hematologic_Disease/train_cnn.py
rename to examples/healthcare/application/Hematologic_Disease/train.py
index 0f267cd5a..a0523f0e8 100644
--- a/examples/healthcare/application/Hematologic_Disease/train_cnn.py
+++ b/examples/healthcare/application/Hematologic_Disease/train.py
@@ -23,7 +23,6 @@
 from singa import tensor
 from singa import opt
 import numpy as np
-from tqdm import tqdm
 import argparse
 import sys
 sys.path.append("../../..")
@@ -55,7 +54,6 @@ def run(dir_path,
         max_epoch,
         batch_size,
         model,
-        data,
         lr,
         graph,
         verbosity,
@@ -66,15 +64,10 @@ def run(dir_path,
     dev = device.create_cpu_device()
     dev.SetRandSeed(0)
     np.random.seed(0)
-    if data == 'bloodmnist':
-        train_dataset, val_dataset, num_class = bloodmnist.load(dir_path=dir_path)
-    else:
-        print(
-            'Wrong dataset!'
-        )
-        sys.exit(0)
 
-    if model == 'cnn':
+    train_dataset, val_dataset, num_class = bloodmnist.load(dir_path=dir_path)
+
+    if model == 'hematologicnet':
         model = hematologic_net.create_model(num_classes=num_class)
     else:
         print(
@@ -112,7 +105,7 @@ def run(dir_path,
 
         # Training part
         model.train()
-        for b in tqdm(range(num_train_batch)):
+        for b in range(num_train_batch):
             # Extract batch from image list
             x, y = train_dataset.batchgenerator(idx[b * batch_size:(b + 1) * batch_size],
                 batch_size=batch_size, data_size=(3, model.input_size, model.input_size))
@@ -124,13 +117,13 @@ def run(dir_path,
             out, loss = model(tx, ty, dist_option, spars)
             train_correct += accuracy(tensor.to_numpy(out), y)
             train_loss += tensor.to_numpy(loss)[0]
-        print('Training loss = %f, training accuracy = %f' %
-                      (train_loss, train_correct /
+        print('Training loss = %f, training accuracy = %.2f %%' %
+                      (train_loss, 100.0 * train_correct /
                        (num_train_batch * batch_size)))
 
         # Validation part
         model.eval()
-        for b in tqdm(range(num_val_batch)):
+        for b in range(num_val_batch):
             x, y = train_dataset.batchgenerator(idx[b * batch_size:(b + 1) * batch_size],
                 batch_size=batch_size, data_size=(3, model.input_size, model.input_size))
             x = x.astype(np_dtype[precision])
@@ -141,8 +134,8 @@ def run(dir_path,
             out = model(tx)
             test_correct += accuracy(tensor.to_numpy(out), y)
 
-        print('Evaluation accuracy = %f, Elapsed Time = %fs' %
-                      (test_correct / (num_val_batch * batch_size),
+        print('Evaluation accuracy = %.2f%%, Elapsed Time = %fs' %
+                      (100.0*test_correct / (num_val_batch * batch_size),
                        time.time() - start_time))
 
 
@@ -152,18 +145,14 @@ def run(dir_path,
         description='Training using the autograd and graph.')
     parser.add_argument(
         'model',
-        choices=['cnn'],
-        default='cnn')
-    parser.add_argument('data',
-                        choices=['bloodmnist'],
-                        default='bloodmnist')
+        choices=['hematologicnet'],
+        default='hematologicnet')
     parser.add_argument('-p',
                         choices=['float32', 'float16'],
                         default='float32',
                         dest='precision')
     parser.add_argument('-dir',
                         '--dir-path',
-                        default="/tmp/bloodmnist",
                         type=str,
                         help='the directory to store the bloodmnist dataset',
                         dest='dir_path')
@@ -175,7 +164,7 @@ def run(dir_path,
                         dest='max_epoch')
     parser.add_argument('-b',
                         '--batch-size',
-                        default=256,
+                        default=8,
                         type=int,
                         help='batch size',
                         dest='batch_size')
@@ -204,7 +193,6 @@ def run(dir_path,
         args.max_epoch,
         args.batch_size,
         args.model,
-        args.data,
         args.lr,
         args.graph,
         args.verbosity,
diff --git a/examples/healthcare/application/Kidney_Disease/README.md b/examples/healthcare/application/Kidney_Disease/README.md
new file mode 100644
index 000000000..0a3979e79
--- /dev/null
+++ b/examples/healthcare/application/Kidney_Disease/README.md
@@ -0,0 +1,46 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Singa for Kidney Disease Prediction
+
+## Kidney disease Prediction Task
+
+Kidney disease prediction is an important tool that uses data science and machine learning techniques to predict the likelihood of a patient suffering from Kidney disease. The core goal of this technology is to judge whether a patient suffers from kidney disease by analyzing multiple data such as a patient’s medical history, physiological indicators, diagnostic information, treatment options, and socioeconomic factors, so as to take appropriate interventions in advance to provide treatment.
+
+The dataset used in this task is MIMIC-III after preprocessed. The features are data containing 6 visit windows, with 2549 frequent diagnoses, procedures and drugs for each window. Each item in features are data for one patient, and these features are encoded by one-hot code. The labels are corresponding flags to mark whether the patient suffered from kidney disease, where the label equals "1" if the patient had kidn  disease, the label equals "0" if not.
+
+
+
+## Structure
+
+* `kidney.py` in floder `healthcare/data` includes the load of pre-processed kidney data to be utilized.
+
+* `kidney_net.py` in folder `healthcare/models` includes the construction codes of the KidneyNet model to be applied for kidney disease prediction.
+
+* `train.py` is the training script, which controls the training flow bydoing BackPropagation and SGD update.
+
+## Instruction
+Before starting to use this model for kidney disease prediction, download the sample dataset for kidney disease prediction: https://github.com/lzjpaul/singa-healthcare/tree/main/data/kidney
+
+The provided dataset is from MIMIC-III, which has been pre-processed. And the dataset contains 100 samples for model testing.
+
+Please download the dataset to a folder(pathToDataset), and then pass the path to run the codes using the following command:
+```bash
+python train.py kidneynet -dir pathToDataset
+```
diff --git a/examples/healthcare/application/Kidney_Disease/run.sh b/examples/healthcare/application/Kidney_Disease/run.sh
new file mode 100644
index 000000000..d86c83bf4
--- /dev/null
+++ b/examples/healthcare/application/Kidney_Disease/run.sh
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+### kidney disease dataset
+python train.py kidneynet -dir pathToDataset
diff --git a/examples/healthcare/Malaria_Detection/train_cnn.py b/examples/healthcare/application/Kidney_Disease/train.py
similarity index 75%
rename from examples/healthcare/Malaria_Detection/train_cnn.py
rename to examples/healthcare/application/Kidney_Disease/train.py
index bfe810d4f..1af8c24ef 100644
--- a/examples/healthcare/Malaria_Detection/train_cnn.py
+++ b/examples/healthcare/application/Kidney_Disease/train.py
@@ -1,3 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
 from singa import singa_wrap as singa
 from singa import device
 from singa import tensor
@@ -5,8 +24,11 @@
 import numpy as np
 import time
 import argparse
-import sys
 from PIL import Image
+import sys
+sys.path.append("../../..")
+from healthcare.data import kidney
+from healthcare.models import kidney_net
 
 np_dtype = {"float16": np.float16, "float32": np.float32}
 
@@ -19,8 +41,8 @@ def augmentation(x, batch_size):
     for data_num in range(0, batch_size):
         offset = np.random.randint(8, size=2)
         x[data_num, :, :, :] = xpad[data_num, :,
-                               offset[0]:offset[0] + x.shape[2],
-                               offset[1]:offset[1] + x.shape[2]]
+                                    offset[0]:offset[0] + x.shape[2],
+                                    offset[1]:offset[1] + x.shape[2]]
         if_flip = np.random.randint(2)
         if (if_flip):
             x[data_num, :, :, :] = x[data_num, :, :, ::-1]
@@ -31,7 +53,10 @@ def augmentation(x, batch_size):
 def accuracy(pred, target):
     # y is network output to be compared with ground truth (int)
     y = np.argmax(pred, axis=1)
+    #print('y:',y)
+    #print('tar:',target)    
     a = y == target
+    #print(np.array(a, "int"))
     correct = np.array(a, "int").sum()
     return correct
 
@@ -72,13 +97,13 @@ def resize_dataset(x, image_size):
         for d in range(0, dim):
             X[n, d, :, :] = np.array(Image.fromarray(x[n, d, :, :]).resize(
                 (image_size, image_size), Image.BILINEAR),
-                dtype=np.float32)
+                                     dtype=np.float32)
     return X
 
 
 def run(global_rank,
         world_size,
-        dir_path,
+        local_rank,
         max_epoch,
         batch_size,
         model,
@@ -86,36 +111,42 @@ def run(global_rank,
         sgd,
         graph,
         verbosity,
+        dir_path,
         dist_option='plain',
         spars=None,
         precision='float32'):
-    # now CPU version only, could change to GPU device for GPU-support machines
+    #dev = device.create_cuda_gpu_on(local_rank)  # need to change to CPU device for CPU-only machines
     dev = device.get_default_device()
     dev.SetRandSeed(0)
     np.random.seed(0)
-    if data == 'malaria':
-        from data import malaria
-        train_x, train_y, val_x, val_y = malaria.load(dir_path=dir_path)
+
+    if data == 'kidney':
+        
+        train_x, train_y, val_x, val_y = kidney.load(dir_path)
     else:
-        print(
-            'Wrong dataset!'
-        )
-        sys.exit(0)
+    	print('Wrong Dataset!')
+    	sys.exit(0)
+
 
     num_channels = train_x.shape[1]
     image_size = train_x.shape[2]
     data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
     num_classes = (np.max(train_y) + 1).item()
+    print(num_channels,image_size)
+
+
+    if model == 'kidneynet':
+        import os, sys, inspect
+        current = os.path.dirname(
+            os.path.abspath(inspect.getfile(inspect.currentframe())))
+        parent = os.path.dirname(current)
+        sys.path.insert(0, parent)
 
-    if model == 'cnn':
-        from model import cnn
-        model = cnn.create_model(num_channels=num_channels,
-                                 num_classes=num_classes)
+        model = kidney_net.create_model(data_size=data_size,
+                                    num_classes=num_classes)
     else:
-        print(
-            'Wrong model!'
-        )
-        sys.exit(0)
+    	print('Wrong model!')
+    	sys.exit(0)    
 
     # For distributed training, sequential has better performance
     if hasattr(sgd, "communicator"):
@@ -135,8 +166,7 @@ def run(global_rank,
             (batch_size, num_channels, model.input_size, model.input_size), dev,
             singa_dtype[precision])
     elif model.dimension == 2:
-        tx = tensor.Tensor((batch_size, data_size),
-                           dev, singa_dtype[precision])
+        tx = tensor.Tensor((batch_size, data_size), dev, singa_dtype[precision])
         np.reshape(train_x, (train_x.shape[0], -1))
         np.reshape(val_x, (val_x.shape[0], -1))
 
@@ -231,24 +261,18 @@ def run(global_rank,
         description='Training using the autograd and graph.')
     parser.add_argument(
         'model',
-        choices=['cnn'],
-        default='cnn')
-    parser.add_argument('data',
-                        choices=['malaria'],
-                        default='malaria')
+        choices=[ 'cardionet', 'diabeticnet',  'drnet', 'hematologicnet', 'kidneynet', 'malarianet', 'tedctnet'],
+        default='kidneynet')
+    parser.add_argument('-data',
+                        choices=['mnist', 'cifar10', 'cifar100','kidney'],
+                        default='kidney')
     parser.add_argument('-p',
                         choices=['float32', 'float16'],
                         default='float32',
                         dest='precision')
-    parser.add_argument('-dir',
-                        '--dir-path',
-                        default="/tmp/malaria",
-                        type=str,
-                        help='the directory to store the malaria dataset',
-                        dest='dir_path')
     parser.add_argument('-m',
                         '--max-epoch',
-                        default=100,
+                        default=20,
                         type=int,
                         help='maximum epochs',
                         dest='max_epoch')
@@ -264,6 +288,13 @@ def run(global_rank,
                         type=float,
                         help='initial learning rate',
                         dest='lr')
+    # Determine which gpu to use
+    parser.add_argument('-i',
+                        '--device-id',
+                        default=0,
+                        type=int,
+                        help='which GPU to use',
+                        dest='device_id')
     parser.add_argument('-g',
                         '--disable-graph',
                         default='True',
@@ -276,14 +307,18 @@ def run(global_rank,
                         type=int,
                         help='logging verbosity',
                         dest='verbosity')
-
+    parser.add_argument('-dir',
+                        '--dir-path',
+                        default="/tmp/kidney",
+                        type=str,
+                        help='the directory to store the kidney dataset',
+                        dest='dir_path')
     args = parser.parse_args()
 
-    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5,
-                  dtype=singa_dtype[args.precision])
+    sgd = opt.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
     run(0,
         1,
-        args.dir_path,
+        args.device_id,
         args.max_epoch,
         args.batch_size,
         args.model,
@@ -291,4 +326,5 @@ def run(global_rank,
         sgd,
         args.graph,
         args.verbosity,
+        args.dir_path,
         precision=args.precision)
diff --git a/examples/healthcare/application/Malaria_Detection/readme.md b/examples/healthcare/application/Malaria_Disease/readme.md
similarity index 97%
rename from examples/healthcare/application/Malaria_Detection/readme.md
rename to examples/healthcare/application/Malaria_Disease/readme.md
index 00100b77f..45a532db8 100644
--- a/examples/healthcare/application/Malaria_Detection/readme.md
+++ b/examples/healthcare/application/Malaria_Disease/readme.md
@@ -40,5 +40,5 @@ To mitigate the problem, we use Singa to implement a machine learning model to h
 
 ## Command
 ```bash
-python train_cnn.py cnn malaria -dir pathToDataset
-```
\ No newline at end of file
+python train.py malarianet -dir pathToDataset
+```
diff --git a/examples/healthcare/application/Malaria_Detection/run.sh b/examples/healthcare/application/Malaria_Disease/run.sh
similarity index 94%
rename from examples/healthcare/application/Malaria_Detection/run.sh
rename to examples/healthcare/application/Malaria_Disease/run.sh
index 8e10e9924..abf2fbc9d 100644
--- a/examples/healthcare/application/Malaria_Detection/run.sh
+++ b/examples/healthcare/application/Malaria_Disease/run.sh
@@ -17,4 +17,4 @@
 #
 
 ### malaria dataset
-python train_cnn.py cnn malaria -dir pathToDataset
\ No newline at end of file
+python train.py malarianet -dir pathToDataset
diff --git a/examples/healthcare/application/Malaria_Detection/train_cnn.py b/examples/healthcare/application/Malaria_Disease/train.py
similarity index 98%
rename from examples/healthcare/application/Malaria_Detection/train_cnn.py
rename to examples/healthcare/application/Malaria_Disease/train.py
index a58ae802c..78fbd31de 100644
--- a/examples/healthcare/application/Malaria_Detection/train_cnn.py
+++ b/examples/healthcare/application/Malaria_Disease/train.py
@@ -130,8 +130,8 @@ def run(global_rank,
     data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
     num_classes = (np.max(train_y) + 1).item()
 
-    if model == 'cnn':
-        model = malaria_net.create_model(model_option='cnn', num_channels=num_channels,
+    if model == 'malarianet':
+        model = malaria_net.create_model(model_option='MalariaNet', num_channels=num_channels,
                                          num_classes=num_classes)
     else:
         print(
@@ -255,8 +255,8 @@ def run(global_rank,
         description='Training using the autograd and graph.')
     parser.add_argument(
         'model',
-        choices=['cnn'],
-        default='cnn')
+        choices=['malarianet'],
+        default='malarianet')
     parser.add_argument('data',
                         choices=['malaria'],
                         default='malaria')
diff --git a/examples/healthcare/application/TED_CT_Detection/README.md b/examples/healthcare/application/TED_CT_Detection/README.md
deleted file mode 100644
index f23e2404a..000000000
--- a/examples/healthcare/application/TED_CT_Detection/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Convolutional Prototype Learning
-
-We have successfully applied the idea of prototype loss in various medical image classification task to improve performance, for example detection thyroid eye disease from CT images. Here we provide the implementation of the convolution prototype model in Singa. Due to data privacy, we are not able to release the CT image dataset used. The training scripts `./train.py` demonstrate how to apply this model on cifar-10 dataset.
-
-
-## run
-
-1. Download `healthcare` directory then change to the `healthcare/application/TED_CT_Detection` directory.
-2. Command.
-```bash
-python train.py -dir pathToDataset
-```
-
-## reference
-
-[Robust Classification with Convolutional Prototype Learning](https://arxiv.org/abs/1805.03438)
\ No newline at end of file
diff --git a/examples/healthcare/application/Thyroid_Eye_Disease/README.md b/examples/healthcare/application/Thyroid_Eye_Disease/README.md
new file mode 100644
index 000000000..755ef96d1
--- /dev/null
+++ b/examples/healthcare/application/Thyroid_Eye_Disease/README.md
@@ -0,0 +1,37 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Convolutional Prototype Learning
+
+We have successfully applied the idea of prototype loss in various medical image classification task to improve performance, for example, detecting thyroid eye disease from CT images. Here we provide the implementation of the convolution prototype model in Singa. Due to data privacy, we are not able to release the CT image dataset used. The training script `./train.py` demonstrates how to apply this model on the cifar-10 dataset.
+
+
+## Running instructions
+
+1. Download the [CIFAR-10 python version](https://www.cs.toronto.edu/~kriz/cifar.html) to a folder(pathToDataset).
+
+2. Start the training
+
+```bash
+python train.py tedctnet -dir pathToDataset
+```
+
+## reference
+
+[Robust Classification with Convolutional Prototype Learning](https://arxiv.org/abs/1805.03438)
\ No newline at end of file
diff --git a/examples/healthcare/application/Thyroid_Eye_Disease/run.sh b/examples/healthcare/application/Thyroid_Eye_Disease/run.sh
new file mode 100644
index 000000000..eef7bd480
--- /dev/null
+++ b/examples/healthcare/application/Thyroid_Eye_Disease/run.sh
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+### command
+python train.py tedctnet -dir pathToDataset
diff --git a/examples/healthcare/application/TED_CT_Detection/train.py b/examples/healthcare/application/Thyroid_Eye_Disease/train.py
similarity index 87%
rename from examples/healthcare/application/TED_CT_Detection/train.py
rename to examples/healthcare/application/Thyroid_Eye_Disease/train.py
index 2b045fd93..270740091 100644
--- a/examples/healthcare/application/TED_CT_Detection/train.py
+++ b/examples/healthcare/application/Thyroid_Eye_Disease/train.py
@@ -60,6 +60,7 @@ def run(
     dir_path,
     max_epoch,
     batch_size,
+    model,
     sgd,
     graph,
     verbosity,
@@ -77,8 +78,14 @@ def run(
     data_size = np.prod(train_x.shape[1 : train_x.ndim]).item()
     num_classes = (np.max(train_y) + 1).item()
 
-    backbone = tedct_net.create_cnn_model(num_channels=num_channels, num_classes=num_classes)
-    model = tedct_net.create_model(backbone, prototype_count=10, lamb=0.5, temp=10)
+    if model == 'tedctnet':
+        backbone = tedct_net.create_backbone(num_channels=num_channels, num_classes=num_classes)
+        model = tedct_net.create_model(backbone, prototype_count=10, lamb=0.5, temp=10)
+    else:
+        print(
+            'Wrong model!'
+        )
+        sys.exit(0)
 
     if backbone.dimension == 4:
         tx = tensor.Tensor(
@@ -105,7 +112,6 @@ def run(
         np.random.shuffle(idx)
 
         train_correct = np.zeros(shape=[1], dtype=np.float32)
-        test_correct = np.zeros(shape=[1], dtype=np.float32)
         train_loss = np.zeros(shape=[1], dtype=np.float32)
 
         model.train()
@@ -119,11 +125,12 @@ def run(
             train_correct += accuracy(tensor.to_numpy(out), y)
             train_loss += tensor.to_numpy(loss)[0]
         print(
-            "Training loss = %f, training accuracy = %f"
-            % (train_loss, train_correct / (num_train_batch * batch_size)),
+            "Training loss = %f, training accuracy = %.2f %%"
+            % (train_loss, 100.0 * train_correct / (num_train_batch * batch_size)),
             flush=True,
         )
 
+    test_correct = np.zeros(shape=[1], dtype=np.float32)
     model.eval()
     for b in range(num_val_batch):
         x = val_x[b * batch_size : (b + 1) * batch_size]
@@ -132,22 +139,26 @@ def run(
         tx.copy_from_numpy(x)
         ty.copy_from_numpy(y)
 
-        out_test = model(tx, ty, dist_option="fp32", spars=None)
+        out_test = model(tx)
         test_correct += accuracy(tensor.to_numpy(out_test), y)
-
+    print('Evaluation accuracy = %.2f %%' %
+          (100.0 * test_correct / (num_val_batch * batch_size)))
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Train a CPL model")
+    parser.add_argument(
+        'model',
+        choices=['tedctnet'],
+        default='tedctnet')
     parser.add_argument('-dir',
                         '--dir-path',
-                        default="/tmp/cifar-10-batches-py",
                         type=str,
                         help='the directory to store the dataset',
                         dest='dir_path')
     parser.add_argument(
         "-m",
         "--max-epoch",
-        default=20,
+        default=10,
         type=int,
         help="maximum epochs",
         dest="max_epoch",
@@ -196,6 +207,7 @@ def run(
         args.dir_path,
         args.max_epoch,
         args.batch_size,
+        args.model,
         sgd,
         args.graph,
         args.verbosity
diff --git a/examples/healthcare/data/bloodmnist.py b/examples/healthcare/data/bloodmnist.py
index 1fe3e5cc3..4042a0c7d 100644
--- a/examples/healthcare/data/bloodmnist.py
+++ b/examples/healthcare/data/bloodmnist.py
@@ -221,7 +221,7 @@ def batchgenerator(self, indexes, batch_size, data_size):
         return batch_x, batch_y
 
 
-def load(dir_path="tmp/bloodmnist"):
+def load(dir_path):
     # Dataset loading
     train_path = os.path.join(dir_path, "train")
     val_path = os.path.join(dir_path, "val")
diff --git a/examples/healthcare/data/cardiovascular.py b/examples/healthcare/data/cardiovascular.py
new file mode 100644
index 000000000..b1e5b98fb
--- /dev/null
+++ b/examples/healthcare/data/cardiovascular.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+from sklearn.model_selection import train_test_split
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
+
+def load_cardio_data(file_path):
+
+    data = np.genfromtxt(file_path, delimiter=',', skip_header=0)
+
+    continuous_cols = [0, 2, 3, 4, 5]    
+    binary1_col = [1]                    
+    ternary_cols = [6, 7]                
+    binary2_cols = [8, 9, 10]            
+    
+    X = data[:, :-1]
+    y = data[:, -1]
+    
+    X_train, X_val, y_train, y_val = train_test_split(
+        X, y, test_size=0.2, random_state=42, shuffle=False
+    )
+    
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ('cont', Pipeline([
+                ('imputer', SimpleImputer(strategy='mean')),
+                ('scaler', StandardScaler())
+            ]), continuous_cols),
+            ('binary1', Pipeline([
+                ('imputer', SimpleImputer(strategy='most_frequent')),
+                ('onehot', OneHotEncoder(sparse_output=False, drop=None))
+            ]), binary1_col),
+            ('ternary', Pipeline([
+                ('imputer', SimpleImputer(strategy='most_frequent')),
+                ('onehot', OneHotEncoder(sparse_output=False, drop=None))
+            ]), ternary_cols),
+            ('binary2', Pipeline([
+                ('imputer', SimpleImputer(strategy='most_frequent')),
+                ('onehot', OneHotEncoder(sparse_output=False, drop=None))
+            ]), binary2_cols)
+        ],
+        remainder='drop'
+    )
+    
+
+    X_train_processed = preprocessor.fit_transform(X_train)
+    X_val_processed = preprocessor.transform(X_val)
+    
+    return X_train_processed, y_train, X_val_processed, y_val
+   
+def load(file_path):
+    
+    
+    try:
+        X_train, y_train, X_val, y_val = load_cardio_data(file_path)
+    except FileNotFoundError:
+        raise SystemExit(f"Error：File {file_path} is not found.")
+    
+
+    X_train = X_train.astype(np.float32)
+    X_val = X_val.astype(np.float32)
+    y_train = y_train.astype(np.int32)
+    y_val = y_val.astype(np.int32)
+    
+    
+    return X_train, y_train, X_val, y_val
+
+
diff --git a/examples/healthcare/data/cifar10.py b/examples/healthcare/data/cifar10.py
index 8e6c3f9ac..1ee6a0bb3 100644
--- a/examples/healthcare/data/cifar10.py
+++ b/examples/healthcare/data/cifar10.py
@@ -40,7 +40,7 @@ def load_dataset(filepath):
     return image, label
 
 
-def load_train_data(dir_path='/tmp/cifar-10-batches-py', num_batches=5):  # need to save to specific local directories
+def load_train_data(dir_path, num_batches=5):  # need to save to specific local directories
     labels = []
     batchsize = 10000
     images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
@@ -54,7 +54,7 @@ def load_train_data(dir_path='/tmp/cifar-10-batches-py', num_batches=5):  # need
     return images, labels
 
 
-def load_test_data(dir_path='/tmp/cifar-10-batches-py'):  # need to save to specific local directories
+def load_test_data(dir_path):  # need to save to specific local directories
     images, labels = load_dataset(check_dataset_exist(dir_path + "/test_batch"))
     return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
 
diff --git a/examples/healthcare/data/diabetic.py b/examples/healthcare/data/diabetic.py
new file mode 100644
index 000000000..6a48b4bd6
--- /dev/null
+++ b/examples/healthcare/data/diabetic.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from ucimlrepo import fetch_ucirepo
+from sklearn.model_selection import train_test_split
+import pandas as pd
+import numpy as np
+
+
+def load_dataset(columns_to_encode=None, flag=True):
+    """
+    Load the dataset and apply one-hot encoding to features (all columns or specific columns).
+    Targets will first be one-hot encoded and then converted to categorical integer labels.
+
+    Parameters:
+        columns_to_encode (list or None): List of column names to be one-hot encoded.
+                                          If None and `flag=True`, all columns are encoded.
+        flag (bool): Whether to apply one-hot encoding to all columns.
+                     If True, `columns_to_encode` will be ignored, and all columns will be processed.
+
+    Returns:
+        train_x, train_y, test_x, test_y (numpy.ndarray):
+            Train features, train labels, test features, and test labels in NumPy array format.
+    """
+    # Load the dataset
+    diabetes_data = fetch_ucirepo(id=296)
+
+    # Extract features and targets
+    features = diabetes_data.data.features
+    targets = diabetes_data.data.targets
+
+    # Apply one-hot encoding to features
+    if flag or columns_to_encode is None:
+        features_encoded = pd.get_dummies(features, drop_first=True)
+    else:
+        features_encoded = pd.get_dummies(features, columns=columns_to_encode, drop_first=True)
+
+    # One-hot encode targets and convert to a single categorical variable
+    targets_encoded = pd.get_dummies(targets, drop_first=False)
+    targets_categorical = targets_encoded.idxmax(axis=1)  # Get the column name with the max value (One-Hot index)
+    targets_categorical = targets_categorical.astype('category').cat.codes  # Convert to integer codes
+
+    # Convert to NumPy arrays
+    features_np = features_encoded.to_numpy(dtype=np.float32)
+    targets_np = targets_categorical.to_numpy(dtype=np.float32)
+
+    # Split the data
+    train_x, test_x, train_y, test_y = train_test_split(
+        features_np, targets_np, test_size=0.2, random_state=42
+    )
+
+    return train_x, train_y, test_x, test_y
+
+
+
+def load():
+    train_x, train_y, val_x, val_y = load_dataset()
+    train_x = train_x.astype(np.float32)
+    val_x = val_x.astype(np.float32)
+    train_y = train_y.astype(np.int32)
+    val_y = val_y.astype(np.int32)
+    return train_x, train_y, val_x, val_y
diff --git a/examples/healthcare/data/kidney.py b/examples/healthcare/data/kidney.py
new file mode 100644
index 000000000..6ed0d7dc2
--- /dev/null
+++ b/examples/healthcare/data/kidney.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy  as np
+
+import pickle
+import sys
+import os
+
+def load_dataset(dir_path="/tmp/kidney"):
+    dir_path = check_dataset_exist(dir_path=dir_path)
+    feature_path = os.path.join(dir_path, "kidney_features.pkl")
+    label_path = os.path.join(dir_path, "kidney_labels.pkl")
+    with open(feature_path,'rb') as f: 
+        features = pickle.load(f)
+    with open(label_path,'rb') as f:  
+        labels = pickle.load(f)
+
+
+    split_train_point = int(len(features) * 8/ 10)
+    train_x, train_y = features[:split_train_point], labels[:split_train_point]
+    val_x, val_y = features[split_train_point:], labels[split_train_point:]
+
+    return train_x,train_y,val_x,val_y
+
+def check_dataset_exist(dir_path):
+    if not os.path.exists(dir_path):
+        print(
+            'Please download the kidney dataset first'
+        )
+        sys.exit(0)
+    return dir_path
+
+
+def load(dir_path):
+    train_x,train_y,val_x,val_y = load_dataset(dir_path)
+
+    train_x = train_x.astype(np.float32)
+    val_x = val_x.astype(np.float32)
+    train_y = train_y.astype(np.int32)
+    val_y = val_y.astype(np.int32)
+    
+    return train_x,train_y,val_x,val_y
+
diff --git a/examples/healthcare/Malaria_Detection/model/mlp.py b/examples/healthcare/models/cardionet.py
similarity index 73%
rename from examples/healthcare/Malaria_Detection/model/mlp.py
rename to examples/healthcare/models/cardionet.py
index 5f46bc321..8adbecc6e 100644
--- a/examples/healthcare/Malaria_Detection/model/mlp.py
+++ b/examples/healthcare/models/cardionet.py
@@ -7,13 +7,14 @@
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#   http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 #
 
 from singa import layer
@@ -29,10 +30,10 @@
 singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
 
 
-class MLP(model.Model):
+class CardioNet(model.Model):
 
-    def __init__(self, perceptron_size=100, num_classes=10):
-        super(MLP, self).__init__()
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+        super(CardioNet, self).__init__()
         self.num_classes = num_classes
         self.dimension = 2
 
@@ -71,15 +72,20 @@ def set_optimizer(self, optimizer):
         self.optimizer = optimizer
 
 
-def create_model(**kwargs):
-    """Constructs a CNN model.
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CardioNet model.
 
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+    
     Returns:
-        The created CNN model.
+        The created CardioNet model.
     """
-    model = MLP(**kwargs)
+    model = CardioNet(**kwargs)
 
     return model
 
 
-__all__ = ['MLP', 'create_model']
+__all__ = ['CardioNet', 'create_model']
+
+
diff --git a/examples/healthcare/models/diabetic_net.py b/examples/healthcare/models/diabetic_net.py
new file mode 100644
index 000000000..a9bffe1a1
--- /dev/null
+++ b/examples/healthcare/models/diabetic_net.py
@@ -0,0 +1,147 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+class diabeticnet(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+        super(diabeticnet, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(perceptron_size)
+        self.linear2 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+
+    Returns:
+        The created CNN model.
+    """
+    model = diabeticnet(**kwargs)
+
+    return model
+
+
+__all__ = ['diabeticnet', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = diabeticnet(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
\ No newline at end of file
diff --git a/examples/healthcare/models/diabetic_retinopthy_net.py b/examples/healthcare/models/diabetic_retinopthy_net.py
index 856adb7e7..e922733a9 100644
--- a/examples/healthcare/models/diabetic_retinopthy_net.py
+++ b/examples/healthcare/models/diabetic_retinopthy_net.py
@@ -20,10 +20,10 @@
 from singa import model
 
 
-class CNN(model.Model):
+class DRNet(model.Model):
 
     def __init__(self, num_classes=10, num_channels=1):
-        super(CNN, self).__init__()
+        super(DRNet, self).__init__()
         self.num_classes = num_classes
         self.input_size = 128
         self.dimension = 4
@@ -78,17 +78,17 @@ def set_optimizer(self, optimizer):
 
 
 def create_model(**kwargs):
-    """Constructs a CNN model.
+    """Constructs a DRNet model.
 
     Args:
         pretrained (bool): If True, returns a pre-trained model.
 
     Returns:
-        The created CNN model.
+        The created DRNet model.
     """
-    model = CNN(**kwargs)
+    model = DRNet(**kwargs)
 
     return model
 
 
-__all__ = ['CNN', 'create_model']
+__all__ = ['DRNet', 'create_model']
diff --git a/examples/healthcare/models/hematologic_net.py b/examples/healthcare/models/hematologic_net.py
index fadd050e9..d295b0cfe 100644
--- a/examples/healthcare/models/hematologic_net.py
+++ b/examples/healthcare/models/hematologic_net.py
@@ -30,9 +30,9 @@
 singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
 
 
-class CNNModel(model.Model):
+class HematologicNet(model.Model):
     def __init__(self, num_classes):
-        super(CNNModel, self).__init__()
+        super(HematologicNet, self).__init__()
         self.input_size = 28
         self.dimension = 4
         self.num_classes = num_classes
@@ -108,17 +108,14 @@ def train_one_batch(self, x, y, dist_option, spars):
 
 
 def create_model(**kwargs):
-    """Constructs a CNN model.
-
-    Args:
-        pretrained (bool): If True, returns a pre-trained model.
+    """Constructs a HematologicNet model.
 
     Returns:
-        The created CNN model.
+        The created HematologicNet model.
     """
-    model = CNNModel(**kwargs)
+    model = HematologicNet(**kwargs)
 
     return model
 
 
-__all__ = ['CNNModel', 'create_model']
\ No newline at end of file
+__all__ = ['HematologicNet', 'create_model']
\ No newline at end of file
diff --git a/examples/healthcare/models/kidney_net.py b/examples/healthcare/models/kidney_net.py
new file mode 100644
index 000000000..67d472412
--- /dev/null
+++ b/examples/healthcare/models/kidney_net.py
@@ -0,0 +1,140 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from singa import layer
+from singa import model
+from singa import tensor
+from singa import opt
+from singa import device
+import argparse
+import numpy as np
+
+np_dtype = {"float16": np.float16, "float32": np.float32}
+
+singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
+
+
+class KidneyNet(model.Model):
+
+    def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
+        super(KidneyNet, self).__init__()
+        self.num_classes = num_classes
+        self.dimension = 2
+
+        self.relu = layer.ReLU()
+        self.linear1 = layer.Linear(perceptron_size)
+        self.linear2 = layer.Linear(num_classes)
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, inputs):
+        y = self.linear1(inputs)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+def create_model(pretrained=False, **kwargs):
+
+    model = KidneyNet(**kwargs)
+
+    return model
+
+
+__all__ = ['KidneyNet', 'create_model']
+
+if __name__ == "__main__":
+    np.random.seed(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p',
+                        choices=['float32', 'float16'],
+                        default='float32',
+                        dest='precision')
+    parser.add_argument('-g',
+                        '--disable-graph',
+                        default='True',
+                        action='store_false',
+                        help='disable graph',
+                        dest='graph')
+    parser.add_argument('-m',
+                        '--max-epoch',
+                        default=1001,
+                        type=int,
+                        help='maximum epochs',
+                        dest='max_epoch')
+    args = parser.parse_args()
+
+    # generate the boundary
+    f = lambda x: (5 * x + 1)
+    bd_x = np.linspace(-1.0, 1, 200)
+    bd_y = f(bd_x)
+
+    # generate the training data
+    x = np.random.uniform(-1, 1, 400)
+    y = f(x) + 2 * np.random.randn(len(x))
+
+    # choose one precision
+    precision = singa_dtype[args.precision]
+    np_precision = np_dtype[args.precision]
+
+    # convert training data to 2d space
+    label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
+    data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)
+
+    dev = device.create_cuda_gpu_on(0)
+    sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
+    tx = tensor.Tensor((400, 2), dev, precision)
+    ty = tensor.Tensor((400,), dev, tensor.int32)
+    model = KidneyNet(data_size=2, perceptron_size=3, num_classes=2)
+
+    # attach model to graph
+    model.set_optimizer(sgd)
+    model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
+    model.train()
+
+    for i in range(args.max_epoch):
+        tx.copy_from_numpy(data)
+        ty.copy_from_numpy(label)
+        out, loss = model(tx, ty, 'fp32', spars=None)
+
+        if i % 100 == 0:
+            print("training loss = ", tensor.to_numpy(loss)[0])
diff --git a/examples/healthcare/models/malaria_net.py b/examples/healthcare/models/malaria_net.py
index 2a10a7078..b7f14dbc5 100644
--- a/examples/healthcare/models/malaria_net.py
+++ b/examples/healthcare/models/malaria_net.py
@@ -28,10 +28,10 @@
 
 singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}
 
-class CNN(model.Model):
+class MalariaNet(model.Model):
 
     def __init__(self, num_classes=10, num_channels=1):
-        super(CNN, self).__init__()
+        super(MalariaNet, self).__init__()
         self.num_classes = num_classes
         self.input_size = 128
         self.dimension = 4
@@ -127,20 +127,20 @@ def set_optimizer(self, optimizer):
         self.optimizer = optimizer
 
 
-def create_model(model_option='cnn', **kwargs):
-    """Constructs a CNN model.
+def create_model(model_option='MalariaNet', **kwargs):
+    """Constructs a MalariaNet model.
 
     Args:
         pretrained (bool): If True, returns a pre-trained model.
 
     Returns:
-        The created CNN model.
+        The created MalariaNet model.
     """
-    model = CNN(**kwargs)
+    model = MalariaNet(**kwargs)
     if model_option=='mlp':
         model = MLP(**kwargs)
 
     return model
 
 
-__all__ = ['CNN', 'MLP', 'create_model']
\ No newline at end of file
+__all__ = ['MalariaNet', 'MLP', 'create_model']
\ No newline at end of file
diff --git a/examples/healthcare/models/tedct_net.py b/examples/healthcare/models/tedct_net.py
new file mode 100644
index 000000000..40662e869
--- /dev/null
+++ b/examples/healthcare/models/tedct_net.py
@@ -0,0 +1,183 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from singa import layer
+from singa import model
+import singa.tensor as tensor
+from singa import autograd
+from singa.tensor import Tensor
+
+
+class CPLayer(layer.Layer):
+    def __init__(self, prototype_count=2, temp=10.0):
+        super(CPLayer, self).__init__()
+        self.prototype_count = prototype_count
+        self.temp = temp
+
+    def initialize(self, x):
+        self.feature_dim = x.shape[1]
+        self.prototype = tensor.random(
+            (self.feature_dim, self.prototype_count), device=x.device
+        )
+
+    def forward(self, feat):
+        self.device_check(feat, self.prototype)
+        self.dtype_check(feat, self.prototype)
+
+        feat_sq = autograd.mul(feat, feat)
+        feat_sq_sum = autograd.reduce_sum(feat_sq, axes=[1], keepdims=1)
+        feat_sq_sum_tile = autograd.tile(feat_sq_sum, repeats=[1, self.feature_dim])
+
+        prototype_sq = autograd.mul(self.prototype, self.prototype)
+        prototype_sq_sum = autograd.reduce_sum(prototype_sq, axes=[0], keepdims=1)
+        prototype_sq_sum_tile = autograd.tile(prototype_sq_sum, repeats=feat.shape[0])
+
+        cross_term = autograd.matmul(feat, self.prototype)
+        cross_term_scale = Tensor(
+            shape=cross_term.shape, device=cross_term.device, requires_grad=False
+        ).set_value(-2)
+        cross_term_scaled = autograd.mul(cross_term, cross_term_scale)
+
+        dist = autograd.add(feat_sq_sum_tile, prototype_sq_sum_tile)
+        dist = autograd.add(dist, cross_term_scaled)
+
+        logits_coeff = (
+            tensor.ones((feat.shape[0], self.prototype.shape[1]), device=feat.device)
+            * -1.0
+            / self.temp
+        )
+        logits_coeff.requires_grad = False
+        logits = autograd.mul(logits_coeff, dist)
+
+        return logits
+
+    def get_params(self):
+        return {self.prototype.name: self.prototype}
+
+    def set_params(self, parameters):
+        self.prototype.copy_from(parameters[self.prototype.name])
+
+
+class TEDctNet(model.Model):
+
+    def __init__(
+        self,
+        backbone: model.Model,
+        prototype_count=2,
+        lamb=0.5,
+        temp=10,
+        label=None,
+        prototype_weight=None,
+    ):
+        super(TEDctNet, self).__init__()
+        # config
+        self.lamb = lamb
+        self.prototype_weight = prototype_weight
+        self.prototype_label = label
+
+        # layer
+        self.backbone = backbone
+        self.cplayer = CPLayer(prototype_count=prototype_count, temp=temp)
+        # optimizer
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, x):
+        feat = self.backbone.forward(x)
+        logits = self.cplayer(feat)
+        return logits
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+        self.optimizer(loss)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+
+class Backbone(model.Model):
+
+    def __init__(self, num_classes=10, num_channels=1):
+        super(Backbone, self).__init__()
+        self.num_classes = num_classes
+        self.input_size = 28
+        self.dimension = 4
+        self.conv1 = layer.Conv2d(num_channels, 20, 5, padding=0, activation="RELU")
+        self.conv2 = layer.Conv2d(20, 50, 5, padding=0, activation="RELU")
+        self.linear1 = layer.Linear(500)
+        self.linear2 = layer.Linear(num_classes)
+        self.pooling1 = layer.MaxPool2d(2, 2, padding=0)
+        self.pooling2 = layer.MaxPool2d(2, 2, padding=0)
+        self.relu = layer.ReLU()
+        self.flatten = layer.Flatten()
+        self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.pooling1(y)
+        y = self.conv2(y)
+        y = self.pooling2(y)
+        y = self.flatten(y)
+        y = self.linear1(y)
+        y = self.relu(y)
+        y = self.linear2(y)
+        return y
+
+    def train_one_batch(self, x, y, dist_option, spars):
+        out = self.forward(x)
+        loss = self.softmax_cross_entropy(out, y)
+
+        if dist_option == 'plain':
+            self.optimizer(loss)
+        elif dist_option == 'half':
+            self.optimizer.backward_and_update_half(loss)
+        elif dist_option == 'partialUpdate':
+            self.optimizer.backward_and_partial_update(loss)
+        elif dist_option == 'sparseTopK':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=True,
+                                                      spars=spars)
+        elif dist_option == 'sparseThreshold':
+            self.optimizer.backward_and_sparse_update(loss,
+                                                      topK=False,
+                                                      spars=spars)
+        return out, loss
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+def create_backbone(pretrained=False, **kwargs):
+    """Constructs a CNN model.
+
+    Args:
+        pretrained (bool): If True, returns a pre-trained model.
+
+    Returns:
+        The created CNN model.
+    """
+    model = Backbone(**kwargs)
+
+    return model
+
+def create_model(backbone, prototype_count=2, lamb=0.5, temp=10):
+    model = TEDctNet(backbone, prototype_count=prototype_count, lamb=lamb, temp=temp)
+    return model
+
+
+__all__ = ["TEDctNet", "Backbone", "create_backbone", "create_model"]
diff --git a/examples/malaria_cnn/train_cnn.py b/examples/malaria_cnn/train_cnn.py
index bfe810d4f..4a54e6c83 100644
--- a/examples/malaria_cnn/train_cnn.py
+++ b/examples/malaria_cnn/train_cnn.py
@@ -1,3 +1,22 @@
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.    
+# 
+
 from singa import singa_wrap as singa
 from singa import device
 from singa import tensor
diff --git a/examples/singa_peft/README.md b/examples/singa_peft/README.md
new file mode 100644
index 000000000..76da6b690
--- /dev/null
+++ b/examples/singa_peft/README.md
@@ -0,0 +1,67 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Singa PEFT
+
+## Code structure design
+
+The code structure design is shown in the following figure. For details, please refer to the next section, which will give a detailed explanation.
+
+<img alt="img.png" height="400" src="docs/peft.png" width="600"/>
+
+
+## Code structure specification
+
+The code structure is shown in the tree structure below, which is specified as follows:
+
+```
+_singa-peft/
+├── __init__.py
+├── tuner/                  
+│   ├── __init__.py
+│   ├── base_tuner.py       
+│   └── linear_lora/        
+│       ├── __init__.py
+│       ├── config.py       
+│       ├── layer.py        
+│       └── tuner.py       
+├── peft_config.py         
+├── peft_registry.py             
+└── peft_model.py          
+``` 
+
+`peft_config.py` contains the peft configuration base class **PeftConfig**, which defines some common parameters. All other peft configuration classes must inherit from this class.
+
+
+`peft_registry.py` contains the **PeftRegistry** class. You can register a new peft tuner by using the annotation `@PeftRegistry.register("xxx")`.
+
+
+`base_tuner.py` contains the **BaseTuner** class, which is the base class for all tuners. All peft methods must inherit from this class and implement `inject` and `merge_weights` abstract methods.
+
+
+`linear_lora/config.py` contains **LinearLoraConfig** class, and inherits from **PeftConfig** class, which includes the necessary parameters for lora method.
+
+
+`linear_lora/layer.py` contains the **LinearLoRALayer** class, which is the implementation of the Lora method in a linear layer. The tuner's `inject` method replaces the linear layer in the base model with this layer.
+
+
+`linear_lora/tuner.py` contains **LinearLoraTuner** class, which inherits from **BaseTuner** class. First you need to register the peft method with the annotation `@PeftRegistry.register("linear_lora")` on the class. Next, you need to implement abstract methods that inherit from **BaseTuner**, including `inject` and `merge_weights` abstraction methods. The `inject` method implementation specifies the layers to replace, for example, the normal `linear layer` instead of the `linearLora layer`. The `merge_weights` method merges the parameters of the replaced layers to speed up the inference.
+
+
+Finally, if you want to extend the new peft method, you can follow the script in the `linear_lora` directory. In addition, you need to expose the corresponding classes and methods in the `__init__.py` file. Also see the scripts in the `examples` directory for how to use the library.
diff --git a/examples/singa_peft/docs/peft.png b/examples/singa_peft/docs/peft.png
new file mode 100644
index 000000000..fa0e1e60b
Binary files /dev/null and b/examples/singa_peft/docs/peft.png differ
diff --git a/setup.py b/setup.py
index a7811c77f..5d0d2f579 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@
 from datetime import date
 
 # stable version
-VERSION = '4.3.0'
+VERSION = '5.0.0'
 # get the git hash
 # git_hash = subprocess.check_output(["git", "describe"]).strip().split('-')[-1][1:]
 # comment the next line to build wheel for stable version
diff --git a/tool/conda/singa/meta.yaml b/tool/conda/singa/meta.yaml
index e8e2fcc60..6fe568d21 100644
--- a/tool/conda/singa/meta.yaml
+++ b/tool/conda/singa/meta.yaml
@@ -20,7 +20,7 @@
 # https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#templating-with-jinja
 # {% set data = load_setup_py_data(setup_file='../../../python/singa/setup.py', from_recipe_dir=True) %}
 
-{% set version = "4.3.0" %}
+{% set version = "5.0.0" %}
 
 package:
   name: singa