From a2f4ad9985e8f072a60b5f6d613050076ccf9ebc Mon Sep 17 00:00:00 2001
From: root <root@yq01-gpu-255-125-22-01.epc.baidu.com>
Date: Thu, 4 Apr 2019 08:06:58 +0000
Subject: [PATCH 1/4] add_more_config_for_lrc

---
 LRC/genotypes.py                   |   8 +-
 LRC/learning_rate.py               |  37 +++-
 LRC/model.py                       | 326 +++++++++++++++++++++++++----
 LRC/operations.py                  |   2 +-
 LRC/{reader.py => reader_cifar.py} |  45 +++-
 LRC/reader_imagenet.py             | 154 ++++++++++++++
 LRC/run.sh                         |   8 -
 LRC/train_imagenet.py              | 254 ++++++++++++++++++++++
 LRC/train_mixup.py                 |  56 +++--
 LRC/utils.py                       |   4 +
 LRC/voting.py                      |  22 ++
 11 files changed, 836 insertions(+), 80 deletions(-)
 rename LRC/{reader.py => reader_cifar.py} (83%)
 create mode 100644 LRC/reader_imagenet.py
 delete mode 100644 LRC/run.sh
 create mode 100644 LRC/train_imagenet.py
 create mode 100644 LRC/voting.py

diff --git a/LRC/genotypes.py b/LRC/genotypes.py
index 349fbd2..8a9ee42 100644
--- a/LRC/genotypes.py
+++ b/LRC/genotypes.py
@@ -113,4 +113,10 @@
             ('skip_connect', 2), ('skip_connect', 3)],
     reduce_concat=range(2, 6))
 
-DARTS = MY_DARTS
+MY_DARTS_list = [
+    Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 2)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('max_pool_3x3', 0), ('skip_connect', 3), ('avg_pool_3x3', 1), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)),
+    Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 1)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('dil_conv_3x3', 0), ('skip_connect', 3), ('skip_connect', 2), ('skip_connect', 3), ('skip_connect',2)], reduce_concat=range(2, 6)),
+    Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6))
+]
+
+DARTS = MY_DARTS_list[0]
diff --git a/LRC/learning_rate.py b/LRC/learning_rate.py
index 3965171..d60a395 100644
--- a/LRC/learning_rate.py
+++ b/LRC/learning_rate.py
@@ -38,6 +38,41 @@ def cosine_decay(learning_rate, num_epoch, steps_one_epoch):
 
     with init_on_cpu():
         decayed_lr = learning_rate * \
-                 (ops.cos((global_step / steps_one_epoch) \
+                 (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \
                  * math.pi / num_epoch) + 1)/2
     return decayed_lr
+
+    
+def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch, 
+                                  warmup_epochs, total_epoch, num_gpu):
+    global_step = _decay_step_counter()
+    epoch_idx = fluid.layers.floor(global_step / steps_one_epoch)
+    
+    lr = fluid.layers.create_global_var(
+        shape=[1],
+        value=0.0,
+        dtype='float32',
+        persistable=True,
+        name="learning_rate")
+
+    warmup_epoch_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True)
+    num_gpu_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True)
+    batch_idx = global_step - steps_one_epoch * epoch_idx 
+
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch_idx < warmup_epoch_var):
+            epoch_ = (batch_idx + 1) / steps_one_epoch
+            factor = 1 / num_gpu_var * (epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1)
+            decayed_lr = learning_rate * factor * num_gpu_var
+            fluid.layers.assign(decayed_lr, lr)
+        epoch_ = (batch_idx + 1) / steps_one_epoch
+        m = epoch_ / total_epoch        
+        frac = (1 + ops.cos(math.pi * m)) / 2
+        cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var
+        with switch.default():
+            fluid.layers.assign(cosine_lr, lr)
+
+    return lr
+
diff --git a/LRC/model.py b/LRC/model.py
index 45a4034..b98def2 100644
--- a/LRC/model.py
+++ b/LRC/model.py
@@ -97,7 +97,9 @@ def forward(self, s0, s1, drop_prob, is_train, name):
 
 
 def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
-    relu_a = fluid.layers.relu(input)
+    relu_a = fluid.layers.relu(input, inplace=True)
+    #relu_a.persistable = True
+    #print(relu_a)
     pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
     conv2d_a = fluid.layers.conv2d(
         pool_a,
@@ -141,6 +143,8 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
             initializer=Constant(0.), name=bn_b_name + '.bias'),
         moving_mean_name=bn_b_name + '.running_mean',
         moving_variance_name=bn_b_name + '.running_var')
+    #bn_b.persistable = True
+    #print(bn_b)
     fc_name = aux_name + '.classifier'
     fc = fluid.layers.fc(bn_b,
                          num_classes,
@@ -174,11 +178,12 @@ def StemConv(input, C_out, kernel_size, padding):
     return bn_a
 
 
+
 class NetworkCIFAR(object):
     def __init__(self, C, class_num, layers, auxiliary, genotype):
-        self.class_num = class_num
         self._layers = layers
         self._auxiliary = auxiliary
+        self.class_num = class_num
 
         stem_multiplier = 3
         self.drop_path_prob = 0
@@ -201,36 +206,12 @@ def __init__(self, C, class_num, layers, auxiliary, genotype):
             if i == 2 * layers // 3:
                 C_to_auxiliary = C_prev
 
-    def forward(self, init_channel, is_train):
-        self.training = is_train
-        self.logits_aux = None
-        num_channel = init_channel * 3
-        s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
-        s1 = s0
-        for i, cell in enumerate(self.cells):
-            name = 'cells.' + str(i) + '.'
-            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
-                                      name)
-            if i == int(2 * self._layers // 3):
-                if self._auxiliary and self.training:
-                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
-        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
-        self.logits = fluid.layers.fc(out,
-                                      size=self.class_num,
-                                      param_attr=ParamAttr(
-                                          initializer=Normal(scale=1e-3),
-                                          name='classifier.weight'),
-                                      bias_attr=ParamAttr(
-                                          initializer=Constant(0.),
-                                          name='classifier.bias'))
-        return self.logits, self.logits_aux
-
-    def build_input(self, image_shape, batch_size, is_train):
+    def build_input(self, image_shape, is_train):
         if is_train:
             py_reader = fluid.layers.py_reader(
                 capacity=64,
                 shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1],
-                        [-1, 1], [-1, batch_size, self.class_num - 1]],
+                        [-1, 1], [50, -1, self.class_num - 1]],
                 lod_levels=[0, 0, 0, 0, 0, 0, 0],
                 dtypes=[
                     "float32", "int64", "int64", "float32", "int32", "int32",
@@ -248,14 +229,52 @@ def build_input(self, image_shape, batch_size, is_train):
                 name='test_reader')
         return py_reader
 
-    def train_model(self, py_reader, init_channels, aux, aux_w, batch_size,
-                    loss_lambda):
+
+    def forward(self, init_channel, is_train):
+        self.training = is_train
+        self.logits_aux = None
+        num_channel = init_channel * 3
+        s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
+        #s0.persistable = True
+        #print(s0)
+        print(s0)
+        for i, cell in enumerate(self.cells):
+            #s1.persistable = True
+            #print(s1)
+            name = 'cells.' + str(i) + '.'
+            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
+                                      name)
+            if i == int(2 * self._layers // 3):
+                if self._auxiliary and self.training:
+                    #s1.persistable = True
+                    #print(s1)
+                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
+                    #self.logits_aux.persistable = True
+                    #print(self.logits_aux)
+        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
+        #out.persistable = True
+        #print(out)
+        self.logits = fluid.layers.fc(out,
+                                      size=self.class_num,
+                                      param_attr=ParamAttr(
+                                          initializer=Normal(scale=1e-3),
+                                          name='classifier.weight'),
+                                      bias_attr=ParamAttr(
+                                          initializer=Constant(0,),
+                                          name='classifier.bias'))
+        #self.logits.persistable = True
+        #print(self.logits)
+        #print(self.logits_aux)
+        return self.logits, self.logits_aux
+
+    def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda):
         self.image, self.ya, self.yb, self.lam, self.label_reshape,\
            self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader)
         self.logits, self.logits_aux = self.forward(init_channels, True)
         self.mixup_loss = self.mixup_loss(aux, aux_w)
-        self.lrc_loss = self.lrc_loss(batch_size)
-        return self.mixup_loss + loss_lambda * self.lrc_loss
+        #self.lrc_loss = self.lrc_loss()
+        #return self.mixup_loss + loss_lambda * self.lrc_loss
+        return self.mixup_loss
 
     def test_model(self, py_reader, init_channels):
         self.image, self.ya = fluid.layers.read_file(py_reader)
@@ -264,12 +283,13 @@ def test_model(self, py_reader, init_channels):
         loss = fluid.layers.cross_entropy(prob, self.ya)
         acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1)
         acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5)
-        return loss, acc_1, acc_5
+        return prob, acc_1, acc_5
 
     def mixup_loss(self, auxiliary, auxiliary_weight):
         prob = fluid.layers.softmax(self.logits, use_cudnn=False)
         loss_a = fluid.layers.cross_entropy(prob, self.ya)
         loss_b = fluid.layers.cross_entropy(prob, self.yb)
+
         loss_a_mean = fluid.layers.reduce_mean(loss_a)
         loss_b_mean = fluid.layers.reduce_mean(loss_b)
         loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean
@@ -281,9 +301,10 @@ def mixup_loss(self, auxiliary, auxiliary_weight):
             loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
             loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
                                                      ) * loss_b_aux_mean
+            #print(loss_aux)
         return loss + auxiliary_weight * loss_aux
 
-    def lrc_loss(self, batch_size):
+    def lrc_loss(self):
         y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1))
         label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1])
         non_label_reshape = fluid.layers.squeeze(
@@ -296,18 +317,247 @@ def lrc_loss(self, batch_size):
         y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                        non_label_reshape)
         y_diff_label = fluid.layers.reshape(
-            y_diff_label_reshape, shape=(-1, batch_size, 1))
+            y_diff_label_reshape, shape=(1, -1, 1))
         y_diff_non_label = fluid.layers.reshape(
             y_diff_non_label_reshape,
-            shape=(-1, batch_size, self.class_num - 1))
+            shape=(1, -1, self.class_num - 1))
         y_diff_ = y_diff_non_label - y_diff_label
 
         y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0])
         rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0])
         rad_y_diff_trans = rad_var_trans * y_diff_
         lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1])
-        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size *
-                                                      (self.class_num - 1))
+        shape_nbc = fluid.layers.shape(rad_y_diff_trans)
+        shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2])
+        num = fluid.layers.reduce_prod(shape_nb)
+        num.stop_gradient = True
+        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num
         lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_)
 
         return lrc_loss_mean
+
+def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
+    relu_a = fluid.layers.relu(input, inplace=True)
+    #relu_a.persistable = True
+    #print(relu_a)
+    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2)
+    conv2d_a = fluid.layers.conv2d(
+        pool_a,
+        128,
+        1,
+        name=aux_name + '.features.2',
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0),
+            name=aux_name + '.features.2.weight'),
+        bias_attr=False)
+    bn_a_name = aux_name + '.features.3'
+    bn_a = fluid.layers.batch_norm(
+        conv2d_a,
+        act='relu',
+        name=bn_a_name,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name=bn_a_name + '.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name=bn_a_name + '.bias'),
+        moving_mean_name=bn_a_name + '.running_mean',
+        moving_variance_name=bn_a_name + '.running_var')
+    conv2d_b = fluid.layers.conv2d(
+        bn_a,
+        768,
+        2,
+        act='relu',
+        name=aux_name + '.features.5',
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0),
+            name=aux_name + '.features.5.weight'),
+        bias_attr=False)
+    #bn_b.persistable = True
+    #print(bn_b)
+    fc_name = aux_name + '.classifier'
+    fc = fluid.layers.fc(conv2d_b,
+                         num_classes,
+                         name=fc_name,
+                         param_attr=ParamAttr(
+                             initializer=Normal(scale=1e-3),
+                             name=fc_name + '.weight'),
+                         bias_attr=ParamAttr(
+                             initializer=Constant(0.), name=fc_name + '.bias'))
+    return fc
+
+
+def Stem0Conv(input, C_out):
+    conv_a = fluid.layers.conv2d(
+        input,
+        C_out // 2,
+        3,
+        stride=2,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem0.0.weight'),
+        bias_attr=False)
+    bn_a = fluid.layers.batch_norm(
+        conv_a,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem0.1.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem0.1.bias'),
+        moving_mean_name='stem0.1.running_mean',
+        moving_variance_name='stem0.1.running_var')
+    relu_a = fluid.layers.relu(bn_a, inplace=True)
+    conv_b = fluid.layers.conv2d(
+        relu_a,
+        C_out,
+        3,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem0.3.weight'),
+        bias_attr=False)
+    bn_b = fluid.layers.batch_norm(
+        conv_b,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem0.4.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem0.4.bias'),
+        moving_mean_name='stem0.4.running_mean',
+        moving_variance_name='stem0.4.running_var')
+
+    return bn_b
+
+def Stem1Conv(input, C_out):
+    relu_a = fluid.layers.relu(input, inplace=True)
+    conv_a = fluid.layers.conv2d(
+        relu_a,
+        C_out,
+        3,
+        stride=2,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem1.1.weight'),
+        bias_attr=False)
+    bn_a = fluid.layers.batch_norm(
+        conv_a,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem1.2.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem1.2.bias'),
+        moving_mean_name='stem1.2.running_mean',
+        moving_variance_name='stem1.2.running_var')
+    return bn_a
+
+class NetworkImageNet(object):
+    def __init__(self, C, class_num, layers, auxiliary, genotype):
+        self.class_num = class_num
+        self._layers = layers
+        self._auxiliary = auxiliary
+
+        self.drop_path_prob = 0
+
+        C_prev_prev, C_prev, C_curr = C, C, C
+        self.cells = []
+        reduction_prev = True
+        for i in range(layers):
+            if i in [layers // 3, 2 * layers // 3]:
+                C_curr *= 2
+                reduction = True
+            else:
+                reduction = False
+            cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
+                        reduction_prev)
+            reduction_prev = reduction
+            self.cells += [cell]
+            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+            if i == 2 * layers // 3:
+                C_to_auxiliary = C_prev
+        self.stem0 = functools.partial(Stem0Conv, C_out=C)
+        self.stem1 = functools.partial(Stem1Conv, C_out=C)
+
+    def build_input(self, image_shape, is_train):
+        if is_train:
+            py_reader = fluid.layers.py_reader(
+                capacity=64,
+                shapes=[[-1] + image_shape, [-1, 1]],
+                lod_levels=[0, 0],
+                dtypes=[
+                    "float32", "int64"],
+                use_double_buffer=True,
+                name='train_reader')
+        else:
+            py_reader = fluid.layers.py_reader(
+                capacity=64,
+                shapes=[[-1] + image_shape, [-1, 1]],
+                lod_levels=[0, 0],
+                dtypes=["float32", "int64"],
+                use_double_buffer=True,
+                name='test_reader')
+        return py_reader
+
+
+    def forward(self, init_channel, is_train):
+        self.training = is_train
+        self.logits_aux = None
+        num_channel = init_channel * 3
+        s0 = self.stem0(self.image)
+        s1 = self.stem1(s0)
+        for i, cell in enumerate(self.cells):
+            #s1.persistable = True
+            #print(s1)
+            name = 'cells.' + str(i) + '.'
+            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
+                                      name)
+            if i == int(2 * self._layers // 3):
+                if self._auxiliary and self.training:
+                    #s1.persistable = True
+                    #print(s1)
+                    self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num)
+                    #self.logits_aux.persistable = True
+                    #print(self.logits_aux)
+        out = fluid.layers.pool2d(s1, 7, "avg")
+        #out.persistable = True
+        #print(out)
+        self.logits = fluid.layers.fc(out,
+                                      size=self.class_num,
+                                      param_attr=ParamAttr(
+                                          initializer=Normal(scale=1e-3),
+                                          name='classifier.weight'),
+                                      bias_attr=ParamAttr(
+                                          initializer=Constant(0,),
+                                          name='classifier.bias'))
+        #self.logits.persistable = True
+        #print(self.logits)
+        #print(self.logits_aux)
+        return self.logits, self.logits_aux
+
+    def calc_loss(self, auxiliary, auxiliary_weight):
+        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
+        loss = fluid.layers.cross_entropy(prob, self.label)
+
+        loss_mean = fluid.layers.reduce_mean(loss)
+        #if auxiliary:
+        #    prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
+        #    loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
+        #    loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
+        prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
+        loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
+        loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
+        return loss_mean + auxiliary_weight * loss_aux_mean
+
+    def train_model(self, py_reader, init_channels, aux, aux_w):
+        self.image, self.label = fluid.layers.read_file(py_reader)
+        self.logits, self.logits_aux = self.forward(init_channels, True)
+        self.loss = self.calc_loss(aux, aux_w)
+        return self.loss
+
+    def test_model(self, py_reader, init_channels):
+        self.image, self.label = fluid.layers.read_file(py_reader)
+        self.logits, _ = self.forward(init_channels, False)
+        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
+        loss = fluid.layers.cross_entropy(prob, self.label)
+        acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1)
+        acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5)
+        return prob, acc_1, acc_5
+
diff --git a/LRC/operations.py b/LRC/operations.py
index b015722..be88a7f 100644
--- a/LRC/operations.py
+++ b/LRC/operations.py
@@ -312,7 +312,7 @@ def FactorizedReduce(input, C_out, name='', affine=True):
         bias_attr=False)
     h_end = relu_a.shape[2]
     w_end = relu_a.shape[3]
-    slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end])
+    slice_a = fluid.layers.slice(input=relu_a, axes=[2, 3], starts=[1, 1], ends=[h_end, w_end])
     conv2d_b = fluid.layers.conv2d(
         slice_a,
         C_out // 2,
diff --git a/LRC/reader.py b/LRC/reader_cifar.py
similarity index 83%
rename from LRC/reader.py
rename to LRC/reader_cifar.py
index 20b32b5..3de35f3 100644
--- a/LRC/reader.py
+++ b/LRC/reader_cifar.py
@@ -31,7 +31,10 @@
 from PIL import ImageOps
 import numpy as np
 
-import cPickle
+try:
+    import cPickle as pickle
+except:
+    import pickle
 import random
 import utils
 import paddle.fluid as fluid
@@ -46,10 +49,9 @@
 image_depth = 3
 half_length = 8
 
-CIFAR_MEAN = [0.4914, 0.4822, 0.4465]
+CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
 CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
 
-
 def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10):
     reshape_label = np.zeros((batch_size, 1), dtype='int32')
     reshape_non_label = np.zeros(
@@ -82,10 +84,11 @@ def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10):
 
 
 def preprocess(sample, is_training, args):
+
     image_array = sample.reshape(3, image_size, image_size)
     rgb_array = np.transpose(image_array, (1, 2, 0))
     img = Image.fromarray(rgb_array, 'RGB')
-
+    
     if is_training:
         # pad and ramdom crop
         img = ImageOps.expand(img, (4, 4, 4, 4), fill=0)  # pad to 40 * 40 * 3
@@ -94,13 +97,13 @@ def preprocess(sample, is_training, args):
                         left_top[1] + image_size))
         if np.random.randint(2):
             img = img.transpose(Image.FLIP_LEFT_RIGHT)
-
+    
     img = np.array(img).astype(np.float32)
 
     # per_image_standardization
     img_float = img / 255.0
     img = (img_float - CIFAR_MEAN) / CIFAR_STD
-
+   
     if is_training and args.cutout:
         center = np.random.randint(image_size, size=2)
         offset_width = max(0, center[0] - half_length)
@@ -111,7 +114,7 @@ def preprocess(sample, is_training, args):
         for i in range(offset_height, target_height):
             for j in range(offset_width, target_width):
                 img[i][j][:] = 0.0
-
+    
     img = np.transpose(img, (2, 0, 1))
     return img
 
@@ -123,13 +126,15 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
     datasets = []
     for name in names:
         print("Reading file " + name)
-        batch = cPickle.load(open(filename + name, 'rb'))
+        batch = pickle.load(open(filename + name, 'rb'))
         data = batch['data']
         labels = batch.get('labels', batch.get('fine_labels', None))
         assert labels is not None
         dataset = zip(data, labels)
         datasets.extend(dataset)
-    random.shuffle(datasets)
+
+    if is_training:
+        random.shuffle(datasets)
 
     def read_batch(datasets, args):
         for sample, label in datasets:
@@ -145,6 +150,10 @@ def reader():
             if len(batch_data) == args.batch_size:
                 batch_data = np.array(batch_data, dtype='float32')
                 batch_label = np.array(batch_label, dtype='int64')
+#
+#                batch_data = pickle.load(open('input.pkl'))
+#                batch_label = pickle.load(open('target.pkl')).reshape(-1,1)
+#               
                 if is_training:
                     flatten_label, flatten_non_label = \
                       generate_reshape_label(batch_label, args.batch_size)
@@ -160,6 +169,24 @@ def reader():
                     yield batch_out
                 batch_data = []
                 batch_label = []
+        if len(batch_data) != 0:
+            batch_data = np.array(batch_data, dtype='float32')
+            batch_label = np.array(batch_label, dtype='int64')
+            if is_training:
+                flatten_label, flatten_non_label = \
+                  generate_reshape_label(batch_label, len(batch_data))
+                rad_var = generate_bernoulli_number(len(batch_data))
+                mixed_x, y_a, y_b, lam = utils.mixup_data(
+                    batch_data, batch_label, len(batch_data),
+                    args.mix_alpha)
+                batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \
+                            flatten_non_label, rad_var]]
+                yield batch_out
+            else:
+                batch_out = [[batch_data, batch_label]]
+                yield batch_out
+            batch_data = []
+            batch_label = []
 
     return reader
 
diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py
new file mode 100644
index 0000000..6577869
--- /dev/null
+++ b/LRC/reader_imagenet.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Based on:
+# --------------------------------------------------------
+# DARTS
+# Copyright (c) 2018, Hanxiao Liu.
+# Licensed under the Apache License, Version 2.0;
+# --------------------------------------------------------
+
+from PIL import Image
+from PIL import ImageOps
+import numpy as np
+
+try:
+    import cPickle as pickle
+except:
+    import pickle
+import random
+import utils
+import paddle.fluid as fluid
+import time
+import os
+import functools
+import paddle.reader
+import math
+
+__all__ = ['train10', 'test10']
+
+train_image_size = 224
+test_image_size = 256
+
+CIFAR_MEAN = [0.485, 0.456, 0.406]
+CIFAR_STD = [0.229, 0.224, 0.225]
+
+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+    aspect_ratio = math.sqrt(np.random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+
+    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+                (float(img.size[1]) / img.size[0]) / (h**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+
+    target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
+                                                                scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+
+    i = np.random.randint(0, img.size[0] - w + 1)
+    j = np.random.randint(0, img.size[1] - h + 1)
+
+    img = img.crop((i, j, i + w, j + h))
+    img = img.resize((size, size), Image.BILINEAR)
+    return img
+
+def crop_image(img, target_size, center=True):
+    width, height = img.size
+    size = target_size
+    if center == True:
+        w_start = (width - size) / 2
+        h_start = (height - size) / 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img.crop((w_start, h_start, w_end, h_end))
+    return img
+
+def preprocess(img_path, is_training):
+
+    img = Image.open(img_path)
+    
+    if is_training:
+        # ramdom resized crop
+        img = random_crop(img, train_image_size)
+        # random horizontal flip
+        if np.random.randint(2):
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+    else:
+        # resize
+        img = img.resize((test_image_size, test_image_size), Image.BILINEAR)
+        # center crop
+        img = crop_image(img, train_image_size)            
+
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+
+    img = np.array(img).astype(np.float32)
+
+    # per_image_standardization
+    img_float = img / 255.0
+    img = (img_float - CIFAR_MEAN) / CIFAR_STD
+   
+    img = np.transpose(img, (2, 0, 1))
+    return img
+
+
+def reader_creator_filepath(data_dir, sub_name, is_training):
+
+    file_list = os.path.join(data_dir, sub_name)
+    image_file = 'train' if is_training else 'val'
+    dataset_path = os.path.join(data_dir, image_file)
+    print(dataset_path)
+    def reader():
+        with open(file_list) as flist:
+            lines = [line.strip() for line in flist]
+            if is_training:
+                np.random.shuffle(lines)
+            for line in lines:
+                img_path, label = line.split()
+                #img_path = img_path.replace("JPEG", "jpeg")
+                img_path_ = os.path.join(dataset_path, img_path)
+                img = preprocess(img_path_, is_training)
+                yield img, int(label)
+
+    return reader
+
+
+def train(args):
+    """
+    CIFAR-10 training set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Training reader creator
+    :rtype: callable
+    """
+
+    return reader_creator_filepath(args.data, 'train.txt', True)
+
+
+def test(args):
+    """
+    CIFAR-10 test set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Test reader creator.
+    :rtype: callable
+    """
+    return reader_creator_filepath(args.data, 'val.txt', False)
diff --git a/LRC/run.sh b/LRC/run.sh
deleted file mode 100644
index 9f1a045..0000000
--- a/LRC/run.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \
---batch_size=80 \
---auxiliary \
---weight_decay=0.0003 \
---learning_rate=0.025 \
---lrc_loss_lambda=0.7 \
---cutout
-
diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py
new file mode 100644
index 0000000..688160e
--- /dev/null
+++ b/LRC/train_imagenet.py
@@ -0,0 +1,254 @@
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+#
+# Based on:
+# --------------------------------------------------------
+# DARTS
+# Copyright (c) 2018, Hanxiao Liu.
+# Licensed under the Apache License, Version 2.0;
+# --------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from learning_rate import cosine_with_warmup_decay
+import numpy as np
+import argparse
+from model import NetworkImageNet as Network
+import reader_imagenet as reader
+import sys
+import os
+import time
+import logging
+import genotypes
+import paddle
+import paddle.fluid as fluid
+import shutil
+import utils
+import math
+
+parser = argparse.ArgumentParser("imagenet")
+parser.add_argument(
+    '--data',
+    type=str,
+    default='./dataset/imagenet/',
+    help='location of the data corpus')
+parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+parser.add_argument(
+    '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
+parser.add_argument('--model_id', type=int, default=2, help='model id')
+parser.add_argument(
+    '--learning_rate', type=float, default=0.025, help='init learning rate')
+parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
+parser.add_argument(
+    '--weight_decay', type=float, default=4e-5, help='weight decay')
+parser.add_argument(
+    '--report_freq', type=float, default=10, help='report frequency')
+parser.add_argument(
+    '--epochs', type=int, default=90, help='num of training epochs')
+parser.add_argument(
+    '--init_channels', type=int, default=96, help='num of init channels')
+parser.add_argument(
+    '--layers', type=int, default=20, help='total number of layers')
+parser.add_argument(
+    '--save_model_path',
+    type=str,
+    default='save_models',
+    help='path to save the model')
+parser.add_argument(
+    '--auxiliary',
+    action='store_true',
+    default=False,
+    help='use auxiliary tower')
+parser.add_argument(
+    '--auxiliary_weight',
+    type=float,
+    default=0.4,
+    help='weight for auxiliary loss')
+parser.add_argument(
+    '--drop_path_prob', type=float, default=0.4, help='drop path probability')
+parser.add_argument(
+    '--arch', type=str, default='DARTS', help='which architecture to use')
+parser.add_argument(
+    '--grad_clip', type=float, default=5, help='gradient clipping')
+parser.add_argument(
+    '--warmup_epochs',
+    default=5,
+    type=float,
+    help='warm up to learning rate')
+parser.add_argument('--lr_min', type=float, default=0.0001,
+                    help='minimum learning rate for a single GPU')
+
+args = parser.parse_args()
+
+ImageNet_CLASSES = 1000
+dataset_train_size = 1281167
+image_size = 224
+genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
+
+def main():
+    image_shape = [3, image_size, image_size]
+    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
+    devices_num = len(devices.split(","))
+    logging.info("args = %s", args)
+    genotype = eval("genotypes.%s" % args.arch)
+    model = Network(args.init_channels, ImageNet_CLASSES, args.layers,
+                    args.auxiliary, genotype)
+    
+    steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
+    train(model, args, image_shape, steps_one_epoch, devices_num)
+
+
+def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
+                  steps_one_epoch, num_gpu):
+    out = []
+    with fluid.program_guard(main_prog, startup_prog):
+        py_reader = model.build_input(im_shape, is_train)
+        if is_train:
+            with fluid.unique_name.guard():
+                loss = model.train_model(py_reader, args.init_channels,
+                                         args.auxiliary, args.auxiliary_weight)
+                optimizer = fluid.optimizer.Momentum(
+                        learning_rate=cosine_with_warmup_decay(\
+                            args.learning_rate, args.lr_min, steps_one_epoch,\
+                            args.warmup_epochs, args.epochs, num_gpu),
+                        regularization=fluid.regularizer.L2Decay(\
+                            args.weight_decay),
+                        momentum=args.momentum)
+                optimizer.minimize(loss)
+                out = [py_reader, loss]
+        else:
+            with fluid.unique_name.guard():
+                prob, acc_1, acc_5 = model.test_model(py_reader,
+                                                      args.init_channels)
+                out = [py_reader, prob, acc_1, acc_5]
+    return out
+
+
+def train(model, args, im_shape, steps_one_epoch, num_gpu):
+    train_startup_prog = fluid.Program()
+    test_startup_prog = fluid.Program()
+    train_prog = fluid.Program()
+    test_prog = fluid.Program()
+
+    train_py_reader, loss_train = build_program(train_prog, train_startup_prog,
+                                                args, True, model, im_shape,
+                                                steps_one_epoch, num_gpu)
+
+    test_py_reader, prob, acc_1, acc_5 = build_program(
+        test_prog, test_startup_prog, args, False, model, im_shape,
+        steps_one_epoch, num_gpu)
+
+    test_prog = test_prog.clone(for_test=True)
+
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(train_startup_prog)
+    exe.run(test_startup_prog)
+
+    #if args.pretrained_model:
+
+    #    def if_exist(var):
+    #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
+
+    #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
+
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_threads = 1
+    train_exe = fluid.ParallelExecutor(
+        main_program=train_prog,
+        use_cuda=True,
+        loss_name=loss_train.name,
+        exec_strategy=exec_strategy)
+    
+    train_batch_size = args.batch_size
+    test_batch_size = 256
+    train_reader = paddle.batch(
+            reader.train(args), batch_size=train_batch_size, drop_last=True)
+    test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size)
+
+    train_py_reader.decorate_paddle_reader(train_reader)
+    test_py_reader.decorate_paddle_reader(test_reader)
+
+    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
+    train_fetch_list = [loss_train]
+    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
+
+    def save_model(postfix, main_prog):
+        model_path = os.path.join(args.save_model_path, postfix)
+        if os.path.isdir(model_path):
+            shutil.rmtree(model_path)
+        fluid.io.save_persistables(exe, model_path, main_program=main_prog)
+
+    def test(epoch_id):
+        test_fetch_list = [prob, acc_1, acc_5]
+        #objs = utils.AvgrageMeter()
+        #prob = []
+        top1 = utils.AvgrageMeter()
+        top5 = utils.AvgrageMeter()
+        test_py_reader.start()
+        test_start_time = time.time()
+        step_id = 0
+        try:
+            while True:
+                prev_test_start_time = test_start_time
+                test_start_time = time.time()
+                prob_v, acc_1_v, acc_5_v = exe.run(
+                    test_prog, fetch_list=test_fetch_list)
+                top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
+                top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
+                if step_id % args.report_freq == 0:
+                    print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
+                          format(epoch_id, step_id,
+                                 np.array(acc_1_v),
+                                 np.array(acc_5_v), test_start_time -
+                                 prev_test_start_time))
+                step_id += 1
+        except fluid.core.EOFException:
+            test_py_reader.reset()
+        print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
+                                                     top5.avg))
+
+    epoch_start_time = time.time()
+    for epoch_id in range(args.epochs):
+        model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs
+        train_py_reader.start()
+        epoch_end_time = time.time()
+        if epoch_id > 0:
+            print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time
+                                                   - epoch_start_time))
+        epoch_start_time = epoch_end_time
+        epoch_end_time
+        start_time = time.time()
+        step_id = 0
+        try:
+            while True:
+                prev_start_time = start_time
+                start_time = time.time()
+                loss_v, = train_exe.run(
+                    fetch_list=[v.name for v in train_fetch_list])
+                print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \
+                        np.array(loss_v).mean(), start_time-prev_start_time))
+                step_id += 1
+                sys.stdout.flush()
+                os._exit(1)
+        except fluid.core.EOFException:
+            train_py_reader.reset()
+        if epoch_id % 50 == 0 or epoch_id == args.epochs - 1:
+            save_model(str(epoch_id), train_prog)
+        test(epoch_id)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py
index de752c8..1e67c6a 100644
--- a/LRC/train_mixup.py
+++ b/LRC/train_mixup.py
@@ -26,7 +26,7 @@
 import numpy as np
 import argparse
 from model import NetworkCIFAR as Network
-import reader
+import reader_cifar as reader
 import sys
 import os
 import time
@@ -35,7 +35,7 @@
 import paddle.fluid as fluid
 import shutil
 import utils
-import cPickle as cp
+import math
 
 parser = argparse.ArgumentParser("cifar")
 parser.add_argument(
@@ -44,6 +44,9 @@
     default='./dataset/cifar/cifar-10-batches-py/',
     help='location of the data corpus')
 parser.add_argument('--batch_size', type=int, default=96, help='batch size')
+parser.add_argument(
+    '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
+parser.add_argument('--model_id', type=int, help='model id')
 parser.add_argument(
     '--learning_rate', type=float, default=0.025, help='init learning rate')
 parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
@@ -58,7 +61,7 @@
 parser.add_argument(
     '--layers', type=int, default=20, help='total number of layers')
 parser.add_argument(
-    '--model_path',
+    '--save_model_path',
     type=str,
     default='saved_models',
     help='path to save the model')
@@ -78,7 +81,6 @@
     '--cutout_length', type=int, default=16, help='cutout length')
 parser.add_argument(
     '--drop_path_prob', type=float, default=0.2, help='drop path probability')
-parser.add_argument('--save', type=str, default='EXP', help='experiment name')
 parser.add_argument(
     '--arch', type=str, default='DARTS', help='which architecture to use')
 parser.add_argument(
@@ -100,9 +102,9 @@
 args = parser.parse_args()
 
 CIFAR_CLASSES = 10
-dataset_train_size = 50000
+dataset_train_size = 50000.
 image_size = 32
-
+genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
 
 def main():
     image_shape = [3, image_size, image_size]
@@ -112,7 +114,8 @@ def main():
     genotype = eval("genotypes.%s" % args.arch)
     model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                     args.auxiliary, genotype)
-    steps_one_epoch = dataset_train_size / (devices_num * args.batch_size)
+    
+    steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
     train(model, args, image_shape, steps_one_epoch)
 
 
@@ -120,12 +123,12 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
                   steps_one_epoch):
     out = []
     with fluid.program_guard(main_prog, startup_prog):
-        py_reader = model.build_input(im_shape, args.batch_size, is_train)
+        py_reader = model.build_input(im_shape, is_train)
         if is_train:
             with fluid.unique_name.guard():
                 loss = model.train_model(py_reader, args.init_channels,
                                          args.auxiliary, args.auxiliary_weight,
-                                         args.batch_size, args.lrc_loss_lambda)
+                                         args.lrc_loss_lambda)
                 optimizer = fluid.optimizer.Momentum(
                         learning_rate=cosine_decay(args.learning_rate, \
                             args.epochs, steps_one_epoch),
@@ -136,9 +139,9 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
                 out = [py_reader, loss]
         else:
             with fluid.unique_name.guard():
-                loss, acc_1, acc_5 = model.test_model(py_reader,
+                prob, acc_1, acc_5 = model.test_model(py_reader,
                                                       args.init_channels)
-                out = [py_reader, loss, acc_1, acc_5]
+                out = [py_reader, prob, acc_1, acc_5]
     return out
 
 
@@ -152,7 +155,7 @@ def train(model, args, im_shape, steps_one_epoch):
                                                 args, True, model, im_shape,
                                                 steps_one_epoch)
 
-    test_py_reader, loss_test, acc_1, acc_5 = build_program(
+    test_py_reader, prob, acc_1, acc_5 = build_program(
         test_prog, test_startup_prog, args, False, model, im_shape,
         steps_one_epoch)
 
@@ -163,6 +166,13 @@ def train(model, args, im_shape, steps_one_epoch):
     exe.run(train_startup_prog)
     exe.run(test_startup_prog)
 
+    #if args.pretrained_model:
+
+    #    def if_exist(var):
+    #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
+
+    #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
+
     exec_strategy = fluid.ExecutionStrategy()
     exec_strategy.num_threads = 1
     train_exe = fluid.ParallelExecutor(
@@ -170,23 +180,27 @@ def train(model, args, im_shape, steps_one_epoch):
         use_cuda=True,
         loss_name=loss_train.name,
         exec_strategy=exec_strategy)
+    
+
     train_reader = reader.train10(args)
     test_reader = reader.test10(args)
     train_py_reader.decorate_paddle_reader(train_reader)
     test_py_reader.decorate_paddle_reader(test_reader)
 
-    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip))
-    fluid.memory_optimize(fluid.default_main_program())
+    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
+    train_fetch_list = [loss_train]
+    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
 
     def save_model(postfix, main_prog):
-        model_path = os.path.join(args.model_path, postfix)
+        model_path = os.path.join(args.save_model_path, postfix)
         if os.path.isdir(model_path):
             shutil.rmtree(model_path)
         fluid.io.save_persistables(exe, model_path, main_program=main_prog)
 
     def test(epoch_id):
-        test_fetch_list = [loss_test, acc_1, acc_5]
-        objs = utils.AvgrageMeter()
+        test_fetch_list = [prob, acc_1, acc_5]
+        #objs = utils.AvgrageMeter()
+        #prob = []
         top1 = utils.AvgrageMeter()
         top5 = utils.AvgrageMeter()
         test_py_reader.start()
@@ -196,11 +210,10 @@ def test(epoch_id):
             while True:
                 prev_test_start_time = test_start_time
                 test_start_time = time.time()
-                loss_test_v, acc_1_v, acc_5_v = exe.run(
+                prob_v, acc_1_v, acc_5_v = exe.run(
                     test_prog, fetch_list=test_fetch_list)
-                objs.update(np.array(loss_test_v), args.batch_size)
-                top1.update(np.array(acc_1_v), args.batch_size)
-                top5.update(np.array(acc_5_v), args.batch_size)
+                top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
+                top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
                 if step_id % args.report_freq == 0:
                     print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
                           format(epoch_id, step_id,
@@ -213,7 +226,6 @@ def test(epoch_id):
         print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
                                                      top5.avg))
 
-    train_fetch_list = [loss_train]
     epoch_start_time = time.time()
     for epoch_id in range(args.epochs):
         model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs
diff --git a/LRC/utils.py b/LRC/utils.py
index 4002b57..1896e1d 100644
--- a/LRC/utils.py
+++ b/LRC/utils.py
@@ -34,6 +34,10 @@ def mixup_data(x, y, batch_size, alpha=1.0):
         lam = 1.
     index = np.random.permutation(batch_size)
 
+    #
+    #lam = 0.5
+    #index = np.arange(batch_size-1, -1, -1)
+    #
     mixed_x = lam * x + (1 - lam) * x[index, :]
     y_a, y_b = y, y[index]
     return mixed_x.astype('float32'), y_a.astype('int64'),\
diff --git a/LRC/voting.py b/LRC/voting.py
new file mode 100644
index 0000000..ad43c56
--- /dev/null
+++ b/LRC/voting.py
@@ -0,0 +1,22 @@
+import numpy as np
+import cPickle as cp
+import sys, os
+
+#model_path = 'final_paddle-results'
+model_path = 'paddle-results'
+fl = os.listdir(model_path)
+labels = np.load('labels.npz')['arr_0']
+pred = np.zeros((10000, 10))
+fl.sort()
+i = 0
+weight=1
+for f in fl:
+    print(f)
+    if i == 1: weight=1.2
+    if i == 2: weight=0.8
+    if i == 3: weight=1.3
+    if i == 4: weight=1.1
+    if i == 5: weight=0.9
+    pred += weight* cp.load(open(os.path.join(model_path, f)))
+    print(np.mean(np.argmax(pred, axis=1) == labels))
+    i += 1

From bca39465b18d8b218e0209ef8d6f80f48db8c84a Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Tue, 9 Apr 2019 02:49:47 +0000
Subject: [PATCH 2/4] add imagenet model

---
 .../{download.sh => download_cifar.sh}        |  0
 LRC/dataset/download_imagenet.sh              | 40 +++++++++++++++++++
 LRC/model.py                                  | 14 ++++---
 LRC/reader_imagenet.py                        |  2 +-
 LRC/train_imagenet.py                         | 20 ++++++----
 5 files changed, 61 insertions(+), 15 deletions(-)
 rename LRC/dataset/{download.sh => download_cifar.sh} (100%)
 create mode 100644 LRC/dataset/download_imagenet.sh

diff --git a/LRC/dataset/download.sh b/LRC/dataset/download_cifar.sh
similarity index 100%
rename from LRC/dataset/download.sh
rename to LRC/dataset/download_cifar.sh
diff --git a/LRC/dataset/download_imagenet.sh b/LRC/dataset/download_imagenet.sh
new file mode 100644
index 0000000..947b890
--- /dev/null
+++ b/LRC/dataset/download_imagenet.sh
@@ -0,0 +1,40 @@
+set -e
+if [ "x${IMAGENET_USERNAME}" == x -o "x${IMAGENET_ACCESS_KEY}" == x ];then
+  echo "Please create an account on image-net.org."
+  echo "It will provide you a pair of username and accesskey to download imagenet data."
+  read -p "Username: " IMAGENET_USERNAME
+  read -p "Accesskey: " IMAGENET_ACCESS_KEY
+fi
+
+root_url=http://www.image-net.org/challenges/LSVRC/2012/nnoupb
+valid_tar=ILSVRC2012_img_val.tar
+train_tar=ILSVRC2012_img_train.tar
+train_folder=train/
+valid_folder=val/
+
+echo "Download imagenet training data..."
+mkdir -p ${train_folder}
+wget -nd -c ${root_url}/${train_tar}
+tar xf ${train_tar} -C ${train_folder}
+
+cd ${train_folder}
+for x in `ls *.tar`
+do
+  filename=`basename $x .tar`
+  mkdir -p $filename
+  tar -xf $x -C $filename
+  rm -rf $x
+done
+cd -
+
+echo "Download imagenet validation data..."
+mkdir -p ${valid_folder}
+wget -nd -c ${root_url}/${valid_tar}
+tar xf ${valid_tar} -C ${valid_folder}
+
+echo "Download imagenet label file: val_list.txt & train_list.txt"
+label_file=ImageNet_label.tgz
+label_url=http://imagenet-data.bj.bcebos.com/${label_file}
+wget -nd -c ${label_url}
+tar zxf ${label_file}
+
diff --git a/LRC/model.py b/LRC/model.py
index b98def2..ca79be0 100644
--- a/LRC/model.py
+++ b/LRC/model.py
@@ -93,7 +93,8 @@ def forward(self, s0, s1, drop_prob, is_train, name):
                         dropout_implementation='upscale_in_train')
             s = h3 + h4
             out += [s]
-        return fluid.layers.concat([out[i] for i in self._concat], axis=1)
+        concat_ = fluid.layers.concat([out[i] for i in self._concat], axis=1, name=name+'concat')
+        return concat_
 
 
 def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
@@ -337,7 +338,7 @@ def lrc_loss(self):
         return lrc_loss_mean
 
 def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
-    relu_a = fluid.layers.relu(input, inplace=True)
+    relu_a = fluid.layers.relu(input, inplace=False)
     #relu_a.persistable = True
     #print(relu_a)
     pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2)
@@ -405,10 +406,11 @@ def Stem0Conv(input, C_out):
         bias_attr=ParamAttr(
             initializer=Constant(0.), name='stem0.1.bias'),
         moving_mean_name='stem0.1.running_mean',
-        moving_variance_name='stem0.1.running_var')
-    relu_a = fluid.layers.relu(bn_a, inplace=True)
+        moving_variance_name='stem0.1.running_var',
+        act='relu')
+    #relu_a = fluid.layers.relu(bn_a,inplace=True)
     conv_b = fluid.layers.conv2d(
-        relu_a,
+        bn_a,
         C_out,
         3,
         padding=1,
@@ -428,7 +430,7 @@ def Stem0Conv(input, C_out):
     return bn_b
 
 def Stem1Conv(input, C_out):
-    relu_a = fluid.layers.relu(input, inplace=True)
+    relu_a = fluid.layers.relu(input,inplace=False)
     conv_a = fluid.layers.conv2d(
         relu_a,
         C_out,
diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py
index 6577869..b8d28aa 100644
--- a/LRC/reader_imagenet.py
+++ b/LRC/reader_imagenet.py
@@ -140,7 +140,7 @@ def train(args):
     :rtype: callable
     """
 
-    return reader_creator_filepath(args.data, 'train.txt', True)
+    return reader_creator_filepath(args.data, 'debug.txt', True)
 
 
 def test(args):
diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py
index 688160e..17e5486 100644
--- a/LRC/train_imagenet.py
+++ b/LRC/train_imagenet.py
@@ -163,14 +163,19 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu):
     #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
 
     #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
-
+    #build_strategy = fluid.BuildStrategy()
+    #build_strategy.enable_inplace = False
+    #build_strategy.memory_optimize = False
+    train_fetch_list = [loss_train]
+  
+    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
     exec_strategy = fluid.ExecutionStrategy()
-    exec_strategy.num_threads = 1
+    #exec_strategy.num_threads = 1
     train_exe = fluid.ParallelExecutor(
-        main_program=train_prog,
-        use_cuda=True,
-        loss_name=loss_train.name,
-        exec_strategy=exec_strategy)
+         main_program=train_prog,
+         use_cuda=True,
+         loss_name=loss_train.name,
+         exec_strategy=exec_strategy)
     
     train_batch_size = args.batch_size
     test_batch_size = 256
@@ -182,8 +187,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu):
     test_py_reader.decorate_paddle_reader(test_reader)
 
     fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
-    train_fetch_list = [loss_train]
-    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
+    print(train_prog.to_string(True))
 
     def save_model(postfix, main_prog):
         model_path = os.path.join(args.save_model_path, postfix)

From 378f568b040531ae98fa2428bd4cc85de734406a Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Tue, 30 Jul 2019 08:25:27 +0000
Subject: [PATCH 3/4] clean code

---
 LRC/model.py              |  65 +++++-------------
 LRC/reader_imagenet.py    | 137 ++++++++++++++++++++++++++++++++++----
 LRC/train_imagenet.py     |  33 ++++-----
 LRC/train_run_imagenet.sh |   5 ++
 4 files changed, 157 insertions(+), 83 deletions(-)
 create mode 100644 LRC/train_run_imagenet.sh

diff --git a/LRC/model.py b/LRC/model.py
index ca79be0..8aa9fe4 100644
--- a/LRC/model.py
+++ b/LRC/model.py
@@ -98,9 +98,7 @@ def forward(self, s0, s1, drop_prob, is_train, name):
 
 
 def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
-    relu_a = fluid.layers.relu(input, inplace=True)
-    #relu_a.persistable = True
-    #print(relu_a)
+    relu_a = fluid.layers.relu(input)
     pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
     conv2d_a = fluid.layers.conv2d(
         pool_a,
@@ -144,8 +142,6 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
             initializer=Constant(0.), name=bn_b_name + '.bias'),
         moving_mean_name=bn_b_name + '.running_mean',
         moving_variance_name=bn_b_name + '.running_var')
-    #bn_b.persistable = True
-    #print(bn_b)
     fc_name = aux_name + '.classifier'
     fc = fluid.layers.fc(bn_b,
                          num_classes,
@@ -236,25 +232,14 @@ def forward(self, init_channel, is_train):
         self.logits_aux = None
         num_channel = init_channel * 3
         s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
-        #s0.persistable = True
-        #print(s0)
-        print(s0)
         for i, cell in enumerate(self.cells):
-            #s1.persistable = True
-            #print(s1)
             name = 'cells.' + str(i) + '.'
             s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
                                       name)
             if i == int(2 * self._layers // 3):
                 if self._auxiliary and self.training:
-                    #s1.persistable = True
-                    #print(s1)
                     self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
-                    #self.logits_aux.persistable = True
-                    #print(self.logits_aux)
         out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
-        #out.persistable = True
-        #print(out)
         self.logits = fluid.layers.fc(out,
                                       size=self.class_num,
                                       param_attr=ParamAttr(
@@ -263,9 +248,6 @@ def forward(self, init_channel, is_train):
                                       bias_attr=ParamAttr(
                                           initializer=Constant(0,),
                                           name='classifier.bias'))
-        #self.logits.persistable = True
-        #print(self.logits)
-        #print(self.logits_aux)
         return self.logits, self.logits_aux
 
     def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda):
@@ -302,7 +284,6 @@ def mixup_loss(self, auxiliary, auxiliary_weight):
             loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
             loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
                                                      ) * loss_b_aux_mean
-            #print(loss_aux)
         return loss + auxiliary_weight * loss_aux
 
     def lrc_loss(self):
@@ -338,10 +319,8 @@ def lrc_loss(self):
         return lrc_loss_mean
 
 def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
-    relu_a = fluid.layers.relu(input, inplace=False)
-    #relu_a.persistable = True
-    #print(relu_a)
-    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2)
+    relu_a = fluid.layers.relu(input)
+    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=3)
     conv2d_a = fluid.layers.conv2d(
         pool_a,
         128,
@@ -374,8 +353,6 @@ def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
                 uniform=False, fan_in=0),
             name=aux_name + '.features.5.weight'),
         bias_attr=False)
-    #bn_b.persistable = True
-    #print(bn_b)
     fc_name = aux_name + '.classifier'
     fc = fluid.layers.fc(conv2d_b,
                          num_classes,
@@ -413,6 +390,7 @@ def Stem0Conv(input, C_out):
         bn_a,
         C_out,
         3,
+        stride=2,
         padding=1,
         param_attr=ParamAttr(
             initializer=Xavier(
@@ -430,7 +408,7 @@ def Stem0Conv(input, C_out):
     return bn_b
 
 def Stem1Conv(input, C_out):
-    relu_a = fluid.layers.relu(input,inplace=False)
+    relu_a = fluid.layers.relu(input)
     conv_a = fluid.layers.conv2d(
         relu_a,
         C_out,
@@ -452,10 +430,9 @@ def Stem1Conv(input, C_out):
     return bn_a
 
 class NetworkImageNet(object):
-    def __init__(self, C, class_num, layers, auxiliary, genotype):
+    def __init__(self, C, class_num, layers, genotype):
         self.class_num = class_num
         self._layers = layers
-        self._auxiliary = auxiliary
 
         self.drop_path_prob = 0
 
@@ -499,28 +476,19 @@ def build_input(self, image_shape, is_train):
         return py_reader
 
 
-    def forward(self, init_channel, is_train):
+    def forward(self, is_train):
         self.training = is_train
         self.logits_aux = None
-        num_channel = init_channel * 3
         s0 = self.stem0(self.image)
         s1 = self.stem1(s0)
         for i, cell in enumerate(self.cells):
-            #s1.persistable = True
-            #print(s1)
             name = 'cells.' + str(i) + '.'
             s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
                                       name)
             if i == int(2 * self._layers // 3):
-                if self._auxiliary and self.training:
-                    #s1.persistable = True
-                    #print(s1)
+                if self.training:
                     self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num)
-                    #self.logits_aux.persistable = True
-                    #print(self.logits_aux)
-        out = fluid.layers.pool2d(s1, 7, "avg")
-        #out.persistable = True
-        #print(out)
+        out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7)
         self.logits = fluid.layers.fc(out,
                                       size=self.class_num,
                                       param_attr=ParamAttr(
@@ -529,12 +497,9 @@ def forward(self, init_channel, is_train):
                                       bias_attr=ParamAttr(
                                           initializer=Constant(0,),
                                           name='classifier.bias'))
-        #self.logits.persistable = True
-        #print(self.logits)
-        #print(self.logits_aux)
         return self.logits, self.logits_aux
 
-    def calc_loss(self, auxiliary, auxiliary_weight):
+    def calc_loss(self, auxiliary_weight):
         prob = fluid.layers.softmax(self.logits, use_cudnn=False)
         loss = fluid.layers.cross_entropy(prob, self.label)
 
@@ -548,15 +513,15 @@ def calc_loss(self, auxiliary, auxiliary_weight):
         loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
         return loss_mean + auxiliary_weight * loss_aux_mean
 
-    def train_model(self, py_reader, init_channels, aux, aux_w):
+    def train_model(self, py_reader, aux_w):
         self.image, self.label = fluid.layers.read_file(py_reader)
-        self.logits, self.logits_aux = self.forward(init_channels, True)
-        self.loss = self.calc_loss(aux, aux_w)
+        self.logits, self.logits_aux = self.forward(True)
+        self.loss = self.calc_loss(aux_w)
         return self.loss
 
-    def test_model(self, py_reader, init_channels):
+    def test_model(self, py_reader):
         self.image, self.label = fluid.layers.read_file(py_reader)
-        self.logits, _ = self.forward(init_channels, False)
+        self.logits, _ = self.forward(False)
         prob = fluid.layers.softmax(self.logits, use_cudnn=False)
         loss = fluid.layers.cross_entropy(prob, self.label)
         acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1)
diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py
index b8d28aa..0701952 100644
--- a/LRC/reader_imagenet.py
+++ b/LRC/reader_imagenet.py
@@ -22,7 +22,6 @@
 from PIL import Image
 from PIL import ImageOps
 import numpy as np
-
 try:
     import cPickle as pickle
 except:
@@ -35,38 +34,64 @@
 import functools
 import paddle.reader
 import math
+import cv2
 
 __all__ = ['train10', 'test10']
 
-train_image_size = 224
+train_image_size = 320
 test_image_size = 256
 
 CIFAR_MEAN = [0.485, 0.456, 0.406]
 CIFAR_STD = [0.229, 0.224, 0.225]
 
+def _parse_kv(r):
+    """ parse kv data from sequence file for imagenet
+    """
+    import cPickle
+    k, v = r
+    obj = cPickle.loads(v)
+    return obj['image'], obj['label']
+
 def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
     aspect_ratio = math.sqrt(np.random.uniform(*ratio))
     w = 1. * aspect_ratio
     h = 1. / aspect_ratio
 
-    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
-                (float(img.size[1]) / img.size[0]) / (h**2))
+    # PIL
+    #bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+    #            (float(img.size[1]) / img.size[0]) / (h**2))
+    # cv2
+    bound = min((float(img.shape[1]) / img.shape[0]) / (w**2),
+                (float(img.shape[0]) / img.shape[1]) / (h**2))
     scale_max = min(scale[1], bound)
     scale_min = min(scale[0], bound)
 
-    target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
-                                                                scale_max)
+    # PIL
+    #target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
+    #                                                            scale_max)
+    #cv2
+    target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min,
+            scale_max)
+
     target_size = math.sqrt(target_area)
     w = int(target_size * w)
     h = int(target_size * h)
 
-    i = np.random.randint(0, img.size[0] - w + 1)
-    j = np.random.randint(0, img.size[1] - h + 1)
+    # PIL
+    #i = np.random.randint(0, img.size[0] - w + 1)
+    #j = np.random.randint(0, img.size[1] - h + 1)
 
-    img = img.crop((i, j, i + w, j + h))
-    img = img.resize((size, size), Image.BILINEAR)
+    #img = img.crop((i, j, i + w, j + h))
+    #img = img.resize((size, size), Image.BILINEAR)
+    # cv2
+    i = np.random.randint(0, img.shape[0] - h + 1)
+    j = np.random.randint(0, img.shape[1] - w + 1)
+    img = img[i:i+h, j:j+w,:]
+    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR)
     return img
 
+# PIL
+"""
 def crop_image(img, target_size, center=True):
     width, height = img.size
     size = target_size
@@ -80,7 +105,24 @@ def crop_image(img, target_size, center=True):
     h_end = h_start + size
     img = img.crop((w_start, h_start, w_end, h_end))
     return img
-
+"""
+# cv2
+def crop_image(img, target_size, center=True):
+    height, width = img.shape[:2]
+    size = target_size
+    if center == True:
+        w_start = (width - size) / 2
+        h_start = (height - size) / 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img[h_start:h_end, w_start:w_end,:]
+    return img
+    
+# PIL
+"""
 def preprocess(img_path, is_training):
 
     img = Image.open(img_path)
@@ -108,14 +150,34 @@ def preprocess(img_path, is_training):
    
     img = np.transpose(img, (2, 0, 1))
     return img
+"""
+# cv2
+def preprocess(img_path, is_training):
+    
+    img = cv2.imread(img_path)
+    if is_training:
+        # random resize crop
+        img = random_crop(img, train_image_size)
+        # random horizontal flip
+        if np.random.randint(2):
+            img = cv2.flip(img, 1)
+    else:
+        # resize
+        img = cv2.resize(img, (test_image_size, test_image_size), interpolation=cv2.INTER_LINEAR)
+        # center crop
+        img = crop_image(img, train_image_size)
 
+    img_float = img[:,:,::-1].astype('float32') / 255
+    img = (img_float - CIFAR_MEAN) / CIFAR_STD
+    img = np.transpose(img, (2, 0, 1))
+
+    return img
 
 def reader_creator_filepath(data_dir, sub_name, is_training):
 
     file_list = os.path.join(data_dir, sub_name)
     image_file = 'train' if is_training else 'val'
     dataset_path = os.path.join(data_dir, image_file)
-    print(dataset_path)
     def reader():
         with open(file_list) as flist:
             lines = [line.strip() for line in flist]
@@ -130,6 +192,49 @@ def reader():
 
     return reader
 
+def _reader_creator(data_dir, file_list,is_training):
+    def multiprocess_reader():
+        full_lines = [line.strip() for line in file_list]
+        # NOTE:maybe do not need shuffle here!
+        if is_training:
+            np.random.shuffle(full_lines)
+        for line in full_lines:
+            img_path, label = line.split()
+            img_path = os.path.join(data_dir, img_path)
+            img = preprocess(img_path,is_training)
+            yield img, int(label)
+#    multiprocess_reader()
+    return multiprocess_reader
+
+def mul_reader_creator_filepath(data_dir, sub_name, is_training):
+
+    file_list = os.path.join(data_dir, sub_name)
+    image_file = 'train' if is_training else 'val'
+    dataset_path = os.path.join(data_dir, image_file)
+    
+    with open(file_list,'r')as f_dir:
+        lines = f_dir.readlines()
+    
+    num_workers = 16
+  
+    n = int(math.ceil(len(lines)/float(num_workers)))
+
+#   global shuffle without image classification " pass seed " strategy
+    if is_training:
+        np.random.shuffle(lines)
+    split_lists = [lines[i:i+n] for i in range(0,len(lines),n)]
+    readers = []
+    for item in split_lists:
+        readers.append(
+                _reader_creator(
+                    dataset_path,
+                    item,
+                    'True'
+                    )
+                )
+    return paddle.reader.multiprocess_reader(readers,False)
+
+
 
 def train(args):
     """
@@ -140,7 +245,10 @@ def train(args):
     :rtype: callable
     """
 
-    return reader_creator_filepath(args.data, 'debug.txt', True)
+   # return reader_creator_filepath(args.data, 'train.txt', True)
+    return mul_reader_creator_filepath('./dataset/imagenet', 'train.txt', True)
+
+
 
 
 def test(args):
@@ -151,4 +259,5 @@ def test(args):
     :return: Test reader creator.
     :rtype: callable
     """
-    return reader_creator_filepath(args.data, 'val.txt', False)
+    return mul_reader_creator_filepath('./dataset/imagenet', 'val.txt', False)
+ #   return reader_creator_filepath(args.data, 'val.txt', False)
diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py
index 17e5486..273b1f2 100644
--- a/LRC/train_imagenet.py
+++ b/LRC/train_imagenet.py
@@ -66,11 +66,6 @@
     type=str,
     default='save_models',
     help='path to save the model')
-parser.add_argument(
-    '--auxiliary',
-    action='store_true',
-    default=False,
-    help='use auxiliary tower')
 parser.add_argument(
     '--auxiliary_weight',
     type=float,
@@ -94,7 +89,7 @@
 
 ImageNet_CLASSES = 1000
 dataset_train_size = 1281167
-image_size = 224
+image_size = 320
 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
 
 def main():
@@ -104,7 +99,7 @@ def main():
     logging.info("args = %s", args)
     genotype = eval("genotypes.%s" % args.arch)
     model = Network(args.init_channels, ImageNet_CLASSES, args.layers,
-                    args.auxiliary, genotype)
+                    genotype)
     
     steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
     train(model, args, image_shape, steps_one_epoch, devices_num)
@@ -117,8 +112,8 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
         py_reader = model.build_input(im_shape, is_train)
         if is_train:
             with fluid.unique_name.guard():
-                loss = model.train_model(py_reader, args.init_channels,
-                                         args.auxiliary, args.auxiliary_weight)
+                loss = model.train_model(py_reader, 
+                                         args.auxiliary_weight)
                 optimizer = fluid.optimizer.Momentum(
                         learning_rate=cosine_with_warmup_decay(\
                             args.learning_rate, args.lr_min, steps_one_epoch,\
@@ -130,8 +125,7 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
                 out = [py_reader, loss]
         else:
             with fluid.unique_name.guard():
-                prob, acc_1, acc_5 = model.test_model(py_reader,
-                                                      args.init_channels)
+                prob, acc_1, acc_5 = model.test_model(py_reader)
                 out = [py_reader, prob, acc_1, acc_5]
     return out
 
@@ -163,19 +157,20 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu):
     #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
 
     #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
-    #build_strategy = fluid.BuildStrategy()
-    #build_strategy.enable_inplace = False
-    #build_strategy.memory_optimize = False
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.enable_inplace = True
+    build_strategy.memory_optimize = False
     train_fetch_list = [loss_train]
   
-    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
+    #fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
     exec_strategy = fluid.ExecutionStrategy()
-    #exec_strategy.num_threads = 1
+    exec_strategy.num_threads = 1
     train_exe = fluid.ParallelExecutor(
          main_program=train_prog,
          use_cuda=True,
          loss_name=loss_train.name,
-         exec_strategy=exec_strategy)
+         exec_strategy=exec_strategy,
+         build_strategy=build_strategy)
     
     train_batch_size = args.batch_size
     test_batch_size = 256
@@ -187,7 +182,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu):
     test_py_reader.decorate_paddle_reader(test_reader)
 
     fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
-    print(train_prog.to_string(True))
+    #print(train_prog.to_string(True))
 
     def save_model(postfix, main_prog):
         model_path = os.path.join(args.save_model_path, postfix)
@@ -246,7 +241,7 @@ def test(epoch_id):
                         np.array(loss_v).mean(), start_time-prev_start_time))
                 step_id += 1
                 sys.stdout.flush()
-                os._exit(1)
+                #os._exit(1)
         except fluid.core.EOFException:
             train_py_reader.reset()
         if epoch_id % 50 == 0 or epoch_id == args.epochs - 1:
diff --git a/LRC/train_run_imagenet.sh b/LRC/train_run_imagenet.sh
new file mode 100644
index 0000000..fc472ba
--- /dev/null
+++ b/LRC/train_run_imagenet.sh
@@ -0,0 +1,5 @@
+export FLAGS_eager_delete_tensor_gb=0.0
+export FLAGS_fast_eager_deletion_mode=1
+export FLAGS_fraction_of_gpu_memory_to_use=1.
+nohup env CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u train_imagenet.py --batch_size=64 > imagenet.log 2>&1 &
+

From e3d910f460edd3c6ce2282c476ae1bbe71e32d29 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Tue, 30 Jul 2019 09:00:16 +0000
Subject: [PATCH 4/4] clean code

---
 LRC/learning_rate.py |  1 -
 LRC/model.py         |  1 -
 LRC/reader_cifar.py  | 13 +++++--------
 LRC/train_mixup.py   | 11 +++--------
 LRC/utils.py         |  4 ----
 5 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/LRC/learning_rate.py b/LRC/learning_rate.py
index 2a19e0e..6658b3e 100644
--- a/LRC/learning_rate.py
+++ b/LRC/learning_rate.py
@@ -76,4 +76,3 @@ def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch,
             fluid.layers.assign(cosine_lr, lr)
 
     return lr
-
diff --git a/LRC/model.py b/LRC/model.py
index 8df2c76..48953da 100644
--- a/LRC/model.py
+++ b/LRC/model.py
@@ -175,7 +175,6 @@ def StemConv(input, C_out, kernel_size, padding):
     return bn_a
 
 
-
 class NetworkCIFAR(object):
     def __init__(self, C, class_num, layers, auxiliary, genotype):
         self._layers = layers
diff --git a/LRC/reader_cifar.py b/LRC/reader_cifar.py
index f3793ec..d773a44 100644
--- a/LRC/reader_cifar.py
+++ b/LRC/reader_cifar.py
@@ -52,6 +52,7 @@
 CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
 CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
 
+
 def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10):
     reshape_label = np.zeros((batch_size, 1), dtype='int32')
     reshape_non_label = np.zeros(
@@ -88,7 +89,7 @@ def preprocess(sample, is_training, args):
     image_array = sample.reshape(3, image_size, image_size)
     rgb_array = np.transpose(image_array, (1, 2, 0))
     img = Image.fromarray(rgb_array, 'RGB')
-    
+
     if is_training:
         # pad and ramdom crop
         img = ImageOps.expand(img, (4, 4, 4, 4), fill=0)  # pad to 40 * 40 * 3
@@ -97,13 +98,13 @@ def preprocess(sample, is_training, args):
                         left_top[1] + image_size))
         if np.random.randint(2):
             img = img.transpose(Image.FLIP_LEFT_RIGHT)
-    
+
     img = np.array(img).astype(np.float32)
 
     # per_image_standardization
     img_float = img / 255.0
     img = (img_float - CIFAR_MEAN) / CIFAR_STD
-   
+
     if is_training and args.cutout:
         center = np.random.randint(image_size, size=2)
         offset_width = max(0, center[0] - half_length)
@@ -114,7 +115,7 @@ def preprocess(sample, is_training, args):
         for i in range(offset_height, target_height):
             for j in range(offset_width, target_width):
                 img[i][j][:] = 0.0
-    
+
     img = np.transpose(img, (2, 0, 1))
     return img
 
@@ -153,10 +154,6 @@ def reader():
             if len(batch_data) == args.batch_size:
                 batch_data = np.array(batch_data, dtype='float32')
                 batch_label = np.array(batch_label, dtype='int64')
-#
-#                batch_data = pickle.load(open('input.pkl'))
-#                batch_label = pickle.load(open('target.pkl')).reshape(-1,1)
-#               
                 if is_training:
                     flatten_label, flatten_non_label = \
                       generate_reshape_label(batch_label, args.batch_size)
diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py
index de3a1be..8f88744 100644
--- a/LRC/train_mixup.py
+++ b/LRC/train_mixup.py
@@ -70,6 +70,7 @@
 image_size = 32
 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
 
+
 def main():
     image_shape = [3, image_size, image_size]
     devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
@@ -79,7 +80,8 @@ def main():
     model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                     args.auxiliary, genotype)
     
-    steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
+    steps_one_epoch = math.ceil(dataset_train_size / 
+                                (devices_num * args.batch_size))
     train(model, args, image_shape, steps_one_epoch)
 
 
@@ -136,13 +138,6 @@ def if_exist(var):
             main_program=train_prog,
             predicate=if_exist)
 
-    #if args.pretrained_model:
-
-    #    def if_exist(var):
-    #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
-
-    #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
-
     exec_strategy = fluid.ExecutionStrategy()
     exec_strategy.num_threads = 1
     build_strategy = fluid.BuildStrategy()
diff --git a/LRC/utils.py b/LRC/utils.py
index 1896e1d..4002b57 100644
--- a/LRC/utils.py
+++ b/LRC/utils.py
@@ -34,10 +34,6 @@ def mixup_data(x, y, batch_size, alpha=1.0):
         lam = 1.
     index = np.random.permutation(batch_size)
 
-    #
-    #lam = 0.5
-    #index = np.arange(batch_size-1, -1, -1)
-    #
     mixed_x = lam * x + (1 - lam) * x[index, :]
     y_a, y_b = y, y[index]
     return mixed_x.astype('float32'), y_a.astype('int64'),\