Oneflow-Inc · Ldpe2G · Aug 30, 2021
diff --git a/Classification/cnns/alexnet_model.py b/Classification/cnns/alexnet_model.py
@@ -60,7 +60,7 @@ def conv2d_layer(
         else (filters, kernel_size_1, kernel_size_2, input.shape[3])
     )
     weight = flow.get_variable(
-        name + "-weight",
+        name + ".weight",
         shape=weight_shape,
         dtype=input.dtype,
         initializer=weight_initializer,
@@ -71,7 +71,7 @@ def conv2d_layer(
     )
     if use_bias:
         bias = flow.get_variable(
-            name + "-bias",
+            name + ".bias",
             shape=(filters,),
             dtype=input.dtype,
             initializer=bias_initializer,
@@ -92,7 +92,7 @@ def alexnet(images, args, trainable=True):
     data_format = "NHWC" if args.channel_last else "NCHW"
 
     conv1 = conv2d_layer(
-        "conv1",
+        "features.0",
         images,
         filters=64,
         kernel_size=11,
@@ -104,22 +104,24 @@ def alexnet(images, args, trainable=True):
     pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", data_format, name="pool1")
 
     conv2 = conv2d_layer(
-        "conv2", pool1, filters=192, kernel_size=5, data_format=data_format
+        "features.3", pool1, filters=192, kernel_size=5, data_format=data_format
     )
 
     pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", data_format, name="pool2")
 
-    conv3 = conv2d_layer("conv3", pool2, filters=384, data_format=data_format)
+    conv3 = conv2d_layer("features.6", pool2, filters=384, data_format=data_format)
 
-    conv4 = conv2d_layer("conv4", conv3, filters=384, data_format=data_format)
+    conv4 = conv2d_layer("features.8", conv3, filters=384, data_format=data_format)
 
-    conv5 = conv2d_layer("conv5", conv4, filters=256, data_format=data_format)
+    conv5 = conv2d_layer("features.10", conv4, filters=256, data_format=data_format)
 
     pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", data_format, name="pool5")
 
     if len(pool5.shape) > 2:
         pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1))
-
+    print("###############")
+    print(pool5.shape)
+    print("###############")
     fc1 = flow.layers.dense(
         inputs=pool5,
         units=4096,
@@ -131,13 +133,13 @@ def alexnet(images, args, trainable=True):
         kernel_regularizer=_get_regularizer(),
         bias_regularizer=_get_regularizer(),
         trainable=trainable,
-        name="fc1",
+        name="classifier.0",
     )
 
-    dropout1 = flow.nn.dropout(fc1, rate=0.5)
+    # dropout1 = flow.nn.dropout(fc1, rate=0.5)
 
     fc2 = flow.layers.dense(
-        inputs=dropout1,
+        inputs=fc1,
         units=4096,
         activation=flow.nn.relu,
         use_bias=True,
@@ -146,21 +148,21 @@ def alexnet(images, args, trainable=True):
         kernel_regularizer=_get_regularizer(),
         bias_regularizer=_get_regularizer(),
         trainable=trainable,
-        name="fc2",
+        name="classifier.2",
     )
 
-    dropout2 = flow.nn.dropout(fc2, rate=0.5)
+    # dropout2 = flow.nn.dropout(fc2, rate=0.5)
 
     fc3 = flow.layers.dense(
-        inputs=dropout2,
-        units=1000,
+        inputs=fc2,
+        units=args.num_classes,
         activation=None,
         use_bias=False,
         kernel_initializer=_get_kernel_initializer(),
         kernel_regularizer=_get_regularizer(),
         bias_initializer=False,
         trainable=trainable,
-        name="fc3",
+        name="classifier.4",
     )
 
     return fc3
diff --git a/Classification/cnns/config.py b/Classification/cnns/config.py
@@ -83,7 +83,7 @@ def str2bool(v):
         "--pad_output",
         type=str2bool,
         nargs="?",
-        const=True,
+        const=False,
         help="Whether to pad the output to number of image channels to 4.",
     )
 

diff --git a/Classification/cnns/of_cnn_train_val.py b/Classification/cnns/of_cnn_train_val.py
@@ -84,8 +84,8 @@ def TrainNet():
     if args.train_data_dir:
         assert os.path.exists(args.train_data_dir)
         print("Loading data from {}".format(args.train_data_dir))
-        (labels, images) = ofrecord_util.load_imagenet_for_training(args)
-
+        # (labels, images) = ofrecord_util.load_imagenet_for_training(args)
+        (labels, images) = ofrecord_util.load_imagenet_for_training_v2(args)
     else:
         print("Loading synthetic data.")
         (labels, images) = ofrecord_util.load_synthetic(args)

diff --git a/Classification/cnns/ofrecord_util.py b/Classification/cnns/ofrecord_util.py
@@ -106,6 +106,44 @@ def load_imagenet_for_training(args):
     )
     return label, normal
 
+def load_imagenet_for_training_v2(args):
+    total_device_num = args.num_nodes * args.gpu_num_per_node
+    train_batch_size = total_device_num * args.batch_size_per_device
+    output_layout = "NHWC" if args.channel_last else "NCHW"
+
+    color_space = "RGB"
+    ofrecord = flow.data.ofrecord_reader(
+        args.train_data_dir,
+        batch_size=train_batch_size,
+        data_part_num=args.train_data_part_num,
+        part_name_suffix_length=5,
+        shuffle_after_epoch=False,
+    )
+    image = flow.data.OFRecordImageDecoder(ofrecord, "encoded", color_space=color_space)
+    label = flow.data.OFRecordRawDecoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+
+    rsz = flow.image.Resize(
+        image,
+        resize_side="shorter",
+        keep_aspect_ratio=True,
+        target_size=args.resize_shorter,
+    )
+
+    normal = flow.image.CropMirrorNormalize(
+        rsz[0],
+        color_space=color_space,
+        output_layout=output_layout,
+        crop_h=args.image_size,
+        crop_w=args.image_size,
+        crop_pos_y=0.5,
+        crop_pos_x=0.5,
+        mean=args.rgb_mean,
+        std=args.rgb_std,
+        output_dtype=flow.float,
+    )
+    return label, normal
 
 def load_imagenet_for_validation(args):
     total_device_num = args.num_nodes * args.gpu_num_per_node

diff --git a/Classification/cnns/train_alexnet.sh b/Classification/cnns/train_alexnet.sh
@@ -0,0 +1,31 @@
+
+OFRECORD_PATH="ofrecord"
+if [ ! -d "$OFRECORD_PATH" ]; then
+    wget https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/imagenette_ofrecord.tar.gz
+    tar zxf imagenette_ofrecord.tar.gz
+fi
+
+MODEL_LOAD_DIR="initial_model_remove_mom"
+CLASSES=10
+
+python3 of_cnn_train_val.py \
+    --train_data_dir=$OFRECORD_PATH/train \
+    --val_data_dir=$OFRECORD_PATH/val \
+    --train_data_part_num=1 \
+    --val_data_part_num=1 \
+    --num_nodes=1 \
+    --gpu_num_per_node=1 \
+    --optimizer="sgd" \
+    --momentum=0.9 \
+    --learning_rate=0.01 \
+    --pad_output=False \
+    --loss_print_every_n_iter=1 \
+    --batch_size_per_device=512 \
+    --val_batch_size_per_device=512 \
+    --num_examples=9469 \
+    --num_val_examples=3925 \
+    --num_epoch=90 \
+    --use_fp16=false \
+    --model="alexnet" \
+    --num_classes=$CLASSES \
+    --model_load_dir=$MODEL_LOAD_DIR