diff --git a/Classification/cnns/train_fp16.sh b/Classification/cnns/train_fp16.sh
new file mode 100755
index 0000000..cda1d15
--- /dev/null
+++ b/Classification/cnns/train_fp16.sh
@@ -0,0 +1,52 @@
+rm -rf core.*
+rm -rf ./output/snapshots/*
+
+if [ -n "$1" ]; then
+    NUM_EPOCH=$1
+else
+    NUM_EPOCH=50
+fi
+echo NUM_EPOCH=$NUM_EPOCH
+
+# training with imagenet
+if [ -n "$2" ]; then
+    DATA_ROOT=$2
+else
+    DATA_ROOT=/data/imagenet/ofrecord
+fi
+echo DATA_ROOT=$DATA_ROOT
+
+LOG_FOLDER=../logs
+mkdir -p $LOG_FOLDER
+LOGFILE=$LOG_FOLDER/resnet_training.log
+
+export PYTHONUNBUFFERED=1
+echo PYTHONUNBUFFERED=$PYTHONUNBUFFERED
+export NCCL_LAUNCH_MODE=PARALLEL
+echo NCCL_LAUNCH_MODE=$NCCL_LAUNCH_MODE
+
+python3 of_cnn_train_val.py \
+     --train_data_dir=$DATA_ROOT/train \
+     --train_data_part_num=256 \
+     --val_data_dir=$DATA_ROOT/validation \
+     --val_data_part_num=256 \
+     --num_nodes=1 \
+     --gpu_num_per_node=8 \
+     --optimizer="sgd" \
+     --momentum=0.875 \
+     --label_smoothing=0.1 \
+     --learning_rate=1.024 \
+     --loss_print_every_n_iter=100 \
+     --batch_size_per_device=128 \
+     --val_batch_size_per_device=50 \
+     --use_fp16 \
+     --channel_last=True \
+     --pad_output \
+     --fuse_bn_relu=True \
+     --fuse_bn_add_relu=True \
+     --nccl_fusion_threshold_mb=16 \
+     --nccl_fusion_max_ops=24 \
+     --num_epoch=$NUM_EPOCH \
+     --model="resnet50" 2>&1 | tee ${LOGFILE}
+
+echo "Writting log to ${LOGFILE}"
diff --git a/Classification/cnns/train_fp32.sh b/Classification/cnns/train_fp32.sh
new file mode 100755
index 0000000..ce06884
--- /dev/null
+++ b/Classification/cnns/train_fp32.sh
@@ -0,0 +1,50 @@
+rm -rf core.*
+rm -rf ./output/snapshots/*
+
+if [ -n "$1" ]; then
+    NUM_EPOCH=$1
+else
+    NUM_EPOCH=50
+fi
+echo NUM_EPOCH=$NUM_EPOCH
+
+# training with imagenet
+if [ -n "$2" ]; then
+    DATA_ROOT=$2
+else
+    DATA_ROOT=/data/imagenet/ofrecord
+fi
+echo DATA_ROOT=$DATA_ROOT
+
+LOG_FOLDER=../logs
+mkdir -p $LOG_FOLDER
+LOGFILE=$LOG_FOLDER/resnet_training.log
+
+export PYTHONUNBUFFERED=1
+echo PYTHONUNBUFFERED=$PYTHONUNBUFFERED
+export NCCL_LAUNCH_MODE=PARALLEL
+echo NCCL_LAUNCH_MODE=$NCCL_LAUNCH_MODE
+
+python3 of_cnn_train_val.py \
+     --train_data_dir=$DATA_ROOT/train \
+     --train_data_part_num=256 \
+     --val_data_dir=$DATA_ROOT/validation \
+     --val_data_part_num=256 \
+     --num_nodes=1 \
+     --gpu_num_per_node=8 \
+     --optimizer="sgd" \
+     --momentum=0.875 \
+     --label_smoothing=0.1 \
+     --learning_rate=0.512 \
+     --loss_print_every_n_iter=100 \
+     --batch_size_per_device=64 \
+     --val_batch_size_per_device=50 \
+     --channel_last=False \
+     --fuse_bn_relu=True \
+     --fuse_bn_add_relu=True \
+     --nccl_fusion_threshold_mb=16 \
+     --nccl_fusion_max_ops=24 \
+     --num_epoch=$NUM_EPOCH \
+     --model="resnet50" 2>&1 | tee ${LOGFILE}
+
+echo "Writting log to ${LOGFILE}"