From a2f4ad9985e8f072a60b5f6d613050076ccf9ebc Mon Sep 17 00:00:00 2001 From: root Date: Thu, 4 Apr 2019 08:06:58 +0000 Subject: [PATCH 1/4] add_more_config_for_lrc --- LRC/genotypes.py | 8 +- LRC/learning_rate.py | 37 +++- LRC/model.py | 326 +++++++++++++++++++++++++---- LRC/operations.py | 2 +- LRC/{reader.py => reader_cifar.py} | 45 +++- LRC/reader_imagenet.py | 154 ++++++++++++++ LRC/run.sh | 8 - LRC/train_imagenet.py | 254 ++++++++++++++++++++++ LRC/train_mixup.py | 56 +++-- LRC/utils.py | 4 + LRC/voting.py | 22 ++ 11 files changed, 836 insertions(+), 80 deletions(-) rename LRC/{reader.py => reader_cifar.py} (83%) create mode 100644 LRC/reader_imagenet.py delete mode 100644 LRC/run.sh create mode 100644 LRC/train_imagenet.py create mode 100644 LRC/voting.py diff --git a/LRC/genotypes.py b/LRC/genotypes.py index 349fbd2..8a9ee42 100644 --- a/LRC/genotypes.py +++ b/LRC/genotypes.py @@ -113,4 +113,10 @@ ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)) -DARTS = MY_DARTS +MY_DARTS_list = [ + Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 2)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('max_pool_3x3', 0), ('skip_connect', 3), ('avg_pool_3x3', 1), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)), + Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 1)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('dil_conv_3x3', 0), ('skip_connect', 3), ('skip_connect', 2), ('skip_connect', 3), ('skip_connect',2)], reduce_concat=range(2, 6)), + Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)) +] + +DARTS = MY_DARTS_list[0] diff --git a/LRC/learning_rate.py b/LRC/learning_rate.py index 3965171..d60a395 100644 --- a/LRC/learning_rate.py +++ b/LRC/learning_rate.py @@ -38,6 +38,41 @@ def cosine_decay(learning_rate, num_epoch, steps_one_epoch): with init_on_cpu(): decayed_lr = learning_rate * \ - (ops.cos((global_step / steps_one_epoch) \ + (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \ * math.pi / num_epoch) + 1)/2 return decayed_lr + + +def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch, + warmup_epochs, total_epoch, num_gpu): + global_step = _decay_step_counter() + epoch_idx = fluid.layers.floor(global_step / steps_one_epoch) + + lr = fluid.layers.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + + warmup_epoch_var = fluid.layers.fill_constant( + shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True) + num_gpu_var = fluid.layers.fill_constant( + shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True) + batch_idx = global_step - steps_one_epoch * epoch_idx + + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch_idx < warmup_epoch_var): + epoch_ = (batch_idx + 1) / steps_one_epoch + factor = 1 / num_gpu_var * (epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1) + decayed_lr = learning_rate * factor * num_gpu_var + fluid.layers.assign(decayed_lr, lr) + epoch_ = (batch_idx + 1) / steps_one_epoch + m = epoch_ / total_epoch + frac = (1 + ops.cos(math.pi * m)) / 2 + cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var + with switch.default(): + fluid.layers.assign(cosine_lr, lr) + + return lr + diff --git a/LRC/model.py b/LRC/model.py index 45a4034..b98def2 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -97,7 +97,9 @@ def forward(self, s0, s1, drop_prob, is_train, name): def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input) + relu_a = fluid.layers.relu(input, inplace=True) + #relu_a.persistable = True + #print(relu_a) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3) conv2d_a = fluid.layers.conv2d( pool_a, @@ -141,6 +143,8 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): initializer=Constant(0.), name=bn_b_name + '.bias'), moving_mean_name=bn_b_name + '.running_mean', moving_variance_name=bn_b_name + '.running_var') + #bn_b.persistable = True + #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(bn_b, num_classes, @@ -174,11 +178,12 @@ def StemConv(input, C_out, kernel_size, padding): return bn_a + class NetworkCIFAR(object): def __init__(self, C, class_num, layers, auxiliary, genotype): - self.class_num = class_num self._layers = layers self._auxiliary = auxiliary + self.class_num = class_num stem_multiplier = 3 self.drop_path_prob = 0 @@ -201,36 +206,12 @@ def __init__(self, C, class_num, layers, auxiliary, genotype): if i == 2 * layers // 3: C_to_auxiliary = C_prev - def forward(self, init_channel, is_train): - self.training = is_train - self.logits_aux = None - num_channel = init_channel * 3 - s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1) - s1 = s0 - for i, cell in enumerate(self.cells): - name = 'cells.' + str(i) + '.' - s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, - name) - if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: - self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) - out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") - self.logits = fluid.layers.fc(out, - size=self.class_num, - param_attr=ParamAttr( - initializer=Normal(scale=1e-3), - name='classifier.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - name='classifier.bias')) - return self.logits, self.logits_aux - - def build_input(self, image_shape, batch_size, is_train): + def build_input(self, image_shape, is_train): if is_train: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1], - [-1, 1], [-1, batch_size, self.class_num - 1]], + [-1, 1], [50, -1, self.class_num - 1]], lod_levels=[0, 0, 0, 0, 0, 0, 0], dtypes=[ "float32", "int64", "int64", "float32", "int32", "int32", @@ -248,14 +229,52 @@ def build_input(self, image_shape, batch_size, is_train): name='test_reader') return py_reader - def train_model(self, py_reader, init_channels, aux, aux_w, batch_size, - loss_lambda): + + def forward(self, init_channel, is_train): + self.training = is_train + self.logits_aux = None + num_channel = init_channel * 3 + s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1) + #s0.persistable = True + #print(s0) + print(s0) + for i, cell in enumerate(self.cells): + #s1.persistable = True + #print(s1) + name = 'cells.' + str(i) + '.' + s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, + name) + if i == int(2 * self._layers // 3): + if self._auxiliary and self.training: + #s1.persistable = True + #print(s1) + self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) + #self.logits_aux.persistable = True + #print(self.logits_aux) + out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") + #out.persistable = True + #print(out) + self.logits = fluid.layers.fc(out, + size=self.class_num, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name='classifier.weight'), + bias_attr=ParamAttr( + initializer=Constant(0,), + name='classifier.bias')) + #self.logits.persistable = True + #print(self.logits) + #print(self.logits_aux) + return self.logits, self.logits_aux + + def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda): self.image, self.ya, self.yb, self.lam, self.label_reshape,\ self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader) self.logits, self.logits_aux = self.forward(init_channels, True) self.mixup_loss = self.mixup_loss(aux, aux_w) - self.lrc_loss = self.lrc_loss(batch_size) - return self.mixup_loss + loss_lambda * self.lrc_loss + #self.lrc_loss = self.lrc_loss() + #return self.mixup_loss + loss_lambda * self.lrc_loss + return self.mixup_loss def test_model(self, py_reader, init_channels): self.image, self.ya = fluid.layers.read_file(py_reader) @@ -264,12 +283,13 @@ def test_model(self, py_reader, init_channels): loss = fluid.layers.cross_entropy(prob, self.ya) acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1) acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5) - return loss, acc_1, acc_5 + return prob, acc_1, acc_5 def mixup_loss(self, auxiliary, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss_a = fluid.layers.cross_entropy(prob, self.ya) loss_b = fluid.layers.cross_entropy(prob, self.yb) + loss_a_mean = fluid.layers.reduce_mean(loss_a) loss_b_mean = fluid.layers.reduce_mean(loss_b) loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean @@ -281,9 +301,10 @@ def mixup_loss(self, auxiliary, auxiliary_weight): loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux) loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam ) * loss_b_aux_mean + #print(loss_aux) return loss + auxiliary_weight * loss_aux - def lrc_loss(self, batch_size): + def lrc_loss(self): y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1)) label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1]) non_label_reshape = fluid.layers.squeeze( @@ -296,18 +317,247 @@ def lrc_loss(self, batch_size): y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape, non_label_reshape) y_diff_label = fluid.layers.reshape( - y_diff_label_reshape, shape=(-1, batch_size, 1)) + y_diff_label_reshape, shape=(1, -1, 1)) y_diff_non_label = fluid.layers.reshape( y_diff_non_label_reshape, - shape=(-1, batch_size, self.class_num - 1)) + shape=(1, -1, self.class_num - 1)) y_diff_ = y_diff_non_label - y_diff_label y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0]) rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0]) rad_y_diff_trans = rad_var_trans * y_diff_ lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1]) - lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size * - (self.class_num - 1)) + shape_nbc = fluid.layers.shape(rad_y_diff_trans) + shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2]) + num = fluid.layers.reduce_prod(shape_nb) + num.stop_gradient = True + lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_) return lrc_loss_mean + +def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): + relu_a = fluid.layers.relu(input, inplace=True) + #relu_a.persistable = True + #print(relu_a) + pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2) + conv2d_a = fluid.layers.conv2d( + pool_a, + 128, + 1, + name=aux_name + '.features.2', + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=aux_name + '.features.2.weight'), + bias_attr=False) + bn_a_name = aux_name + '.features.3' + bn_a = fluid.layers.batch_norm( + conv2d_a, + act='relu', + name=bn_a_name, + param_attr=ParamAttr( + initializer=Constant(1.), name=bn_a_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=bn_a_name + '.bias'), + moving_mean_name=bn_a_name + '.running_mean', + moving_variance_name=bn_a_name + '.running_var') + conv2d_b = fluid.layers.conv2d( + bn_a, + 768, + 2, + act='relu', + name=aux_name + '.features.5', + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=aux_name + '.features.5.weight'), + bias_attr=False) + #bn_b.persistable = True + #print(bn_b) + fc_name = aux_name + '.classifier' + fc = fluid.layers.fc(conv2d_b, + num_classes, + name=fc_name, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name=fc_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=fc_name + '.bias')) + return fc + + +def Stem0Conv(input, C_out): + conv_a = fluid.layers.conv2d( + input, + C_out // 2, + 3, + stride=2, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem0.0.weight'), + bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.1.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.1.bias'), + moving_mean_name='stem0.1.running_mean', + moving_variance_name='stem0.1.running_var') + relu_a = fluid.layers.relu(bn_a, inplace=True) + conv_b = fluid.layers.conv2d( + relu_a, + C_out, + 3, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem0.3.weight'), + bias_attr=False) + bn_b = fluid.layers.batch_norm( + conv_b, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.4.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.4.bias'), + moving_mean_name='stem0.4.running_mean', + moving_variance_name='stem0.4.running_var') + + return bn_b + +def Stem1Conv(input, C_out): + relu_a = fluid.layers.relu(input, inplace=True) + conv_a = fluid.layers.conv2d( + relu_a, + C_out, + 3, + stride=2, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem1.1.weight'), + bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem1.2.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem1.2.bias'), + moving_mean_name='stem1.2.running_mean', + moving_variance_name='stem1.2.running_var') + return bn_a + +class NetworkImageNet(object): + def __init__(self, C, class_num, layers, auxiliary, genotype): + self.class_num = class_num + self._layers = layers + self._auxiliary = auxiliary + + self.drop_path_prob = 0 + + C_prev_prev, C_prev, C_curr = C, C, C + self.cells = [] + reduction_prev = True + for i in range(layers): + if i in [layers // 3, 2 * layers // 3]: + C_curr *= 2 + reduction = True + else: + reduction = False + cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, + reduction_prev) + reduction_prev = reduction + self.cells += [cell] + C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr + if i == 2 * layers // 3: + C_to_auxiliary = C_prev + self.stem0 = functools.partial(Stem0Conv, C_out=C) + self.stem1 = functools.partial(Stem1Conv, C_out=C) + + def build_input(self, image_shape, is_train): + if is_train: + py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 1]], + lod_levels=[0, 0], + dtypes=[ + "float32", "int64"], + use_double_buffer=True, + name='train_reader') + else: + py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 1]], + lod_levels=[0, 0], + dtypes=["float32", "int64"], + use_double_buffer=True, + name='test_reader') + return py_reader + + + def forward(self, init_channel, is_train): + self.training = is_train + self.logits_aux = None + num_channel = init_channel * 3 + s0 = self.stem0(self.image) + s1 = self.stem1(s0) + for i, cell in enumerate(self.cells): + #s1.persistable = True + #print(s1) + name = 'cells.' + str(i) + '.' + s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, + name) + if i == int(2 * self._layers // 3): + if self._auxiliary and self.training: + #s1.persistable = True + #print(s1) + self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) + #self.logits_aux.persistable = True + #print(self.logits_aux) + out = fluid.layers.pool2d(s1, 7, "avg") + #out.persistable = True + #print(out) + self.logits = fluid.layers.fc(out, + size=self.class_num, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name='classifier.weight'), + bias_attr=ParamAttr( + initializer=Constant(0,), + name='classifier.bias')) + #self.logits.persistable = True + #print(self.logits) + #print(self.logits_aux) + return self.logits, self.logits_aux + + def calc_loss(self, auxiliary, auxiliary_weight): + prob = fluid.layers.softmax(self.logits, use_cudnn=False) + loss = fluid.layers.cross_entropy(prob, self.label) + + loss_mean = fluid.layers.reduce_mean(loss) + #if auxiliary: + # prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) + # loss_aux = fluid.layers.cross_entropy(prob_aux, self.label) + # loss_aux_mean = fluid.layers.reduce_mean(loss_aux) + prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) + loss_aux = fluid.layers.cross_entropy(prob_aux, self.label) + loss_aux_mean = fluid.layers.reduce_mean(loss_aux) + return loss_mean + auxiliary_weight * loss_aux_mean + + def train_model(self, py_reader, init_channels, aux, aux_w): + self.image, self.label = fluid.layers.read_file(py_reader) + self.logits, self.logits_aux = self.forward(init_channels, True) + self.loss = self.calc_loss(aux, aux_w) + return self.loss + + def test_model(self, py_reader, init_channels): + self.image, self.label = fluid.layers.read_file(py_reader) + self.logits, _ = self.forward(init_channels, False) + prob = fluid.layers.softmax(self.logits, use_cudnn=False) + loss = fluid.layers.cross_entropy(prob, self.label) + acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1) + acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5) + return prob, acc_1, acc_5 + diff --git a/LRC/operations.py b/LRC/operations.py index b015722..be88a7f 100644 --- a/LRC/operations.py +++ b/LRC/operations.py @@ -312,7 +312,7 @@ def FactorizedReduce(input, C_out, name='', affine=True): bias_attr=False) h_end = relu_a.shape[2] w_end = relu_a.shape[3] - slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end]) + slice_a = fluid.layers.slice(input=relu_a, axes=[2, 3], starts=[1, 1], ends=[h_end, w_end]) conv2d_b = fluid.layers.conv2d( slice_a, C_out // 2, diff --git a/LRC/reader.py b/LRC/reader_cifar.py similarity index 83% rename from LRC/reader.py rename to LRC/reader_cifar.py index 20b32b5..3de35f3 100644 --- a/LRC/reader.py +++ b/LRC/reader_cifar.py @@ -31,7 +31,10 @@ from PIL import ImageOps import numpy as np -import cPickle +try: + import cPickle as pickle +except: + import pickle import random import utils import paddle.fluid as fluid @@ -46,10 +49,9 @@ image_depth = 3 half_length = 8 -CIFAR_MEAN = [0.4914, 0.4822, 0.4465] +CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124] CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] - def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10): reshape_label = np.zeros((batch_size, 1), dtype='int32') reshape_non_label = np.zeros( @@ -82,10 +84,11 @@ def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10): def preprocess(sample, is_training, args): + image_array = sample.reshape(3, image_size, image_size) rgb_array = np.transpose(image_array, (1, 2, 0)) img = Image.fromarray(rgb_array, 'RGB') - + if is_training: # pad and ramdom crop img = ImageOps.expand(img, (4, 4, 4, 4), fill=0) # pad to 40 * 40 * 3 @@ -94,13 +97,13 @@ def preprocess(sample, is_training, args): left_top[1] + image_size)) if np.random.randint(2): img = img.transpose(Image.FLIP_LEFT_RIGHT) - + img = np.array(img).astype(np.float32) # per_image_standardization img_float = img / 255.0 img = (img_float - CIFAR_MEAN) / CIFAR_STD - + if is_training and args.cutout: center = np.random.randint(image_size, size=2) offset_width = max(0, center[0] - half_length) @@ -111,7 +114,7 @@ def preprocess(sample, is_training, args): for i in range(offset_height, target_height): for j in range(offset_width, target_width): img[i][j][:] = 0.0 - + img = np.transpose(img, (2, 0, 1)) return img @@ -123,13 +126,15 @@ def reader_creator_filepath(filename, sub_name, is_training, args): datasets = [] for name in names: print("Reading file " + name) - batch = cPickle.load(open(filename + name, 'rb')) + batch = pickle.load(open(filename + name, 'rb')) data = batch['data'] labels = batch.get('labels', batch.get('fine_labels', None)) assert labels is not None dataset = zip(data, labels) datasets.extend(dataset) - random.shuffle(datasets) + + if is_training: + random.shuffle(datasets) def read_batch(datasets, args): for sample, label in datasets: @@ -145,6 +150,10 @@ def reader(): if len(batch_data) == args.batch_size: batch_data = np.array(batch_data, dtype='float32') batch_label = np.array(batch_label, dtype='int64') +# +# batch_data = pickle.load(open('input.pkl')) +# batch_label = pickle.load(open('target.pkl')).reshape(-1,1) +# if is_training: flatten_label, flatten_non_label = \ generate_reshape_label(batch_label, args.batch_size) @@ -160,6 +169,24 @@ def reader(): yield batch_out batch_data = [] batch_label = [] + if len(batch_data) != 0: + batch_data = np.array(batch_data, dtype='float32') + batch_label = np.array(batch_label, dtype='int64') + if is_training: + flatten_label, flatten_non_label = \ + generate_reshape_label(batch_label, len(batch_data)) + rad_var = generate_bernoulli_number(len(batch_data)) + mixed_x, y_a, y_b, lam = utils.mixup_data( + batch_data, batch_label, len(batch_data), + args.mix_alpha) + batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \ + flatten_non_label, rad_var]] + yield batch_out + else: + batch_out = [[batch_data, batch_label]] + yield batch_out + batch_data = [] + batch_label = [] return reader diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py new file mode 100644 index 0000000..6577869 --- /dev/null +++ b/LRC/reader_imagenet.py @@ -0,0 +1,154 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from PIL import Image +from PIL import ImageOps +import numpy as np + +try: + import cPickle as pickle +except: + import pickle +import random +import utils +import paddle.fluid as fluid +import time +import os +import functools +import paddle.reader +import math + +__all__ = ['train10', 'test10'] + +train_image_size = 224 +test_image_size = 256 + +CIFAR_MEAN = [0.485, 0.456, 0.406] +CIFAR_STD = [0.229, 0.224, 0.225] + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = np.random.randint(0, img.size[0] - w + 1) + j = np.random.randint(0, img.size[1] - h + 1) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.BILINEAR) + return img + +def crop_image(img, target_size, center=True): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + +def preprocess(img_path, is_training): + + img = Image.open(img_path) + + if is_training: + # ramdom resized crop + img = random_crop(img, train_image_size) + # random horizontal flip + if np.random.randint(2): + img = img.transpose(Image.FLIP_LEFT_RIGHT) + else: + # resize + img = img.resize((test_image_size, test_image_size), Image.BILINEAR) + # center crop + img = crop_image(img, train_image_size) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype(np.float32) + + # per_image_standardization + img_float = img / 255.0 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + + img = np.transpose(img, (2, 0, 1)) + return img + + +def reader_creator_filepath(data_dir, sub_name, is_training): + + file_list = os.path.join(data_dir, sub_name) + image_file = 'train' if is_training else 'val' + dataset_path = os.path.join(data_dir, image_file) + print(dataset_path) + def reader(): + with open(file_list) as flist: + lines = [line.strip() for line in flist] + if is_training: + np.random.shuffle(lines) + for line in lines: + img_path, label = line.split() + #img_path = img_path.replace("JPEG", "jpeg") + img_path_ = os.path.join(dataset_path, img_path) + img = preprocess(img_path_, is_training) + yield img, int(label) + + return reader + + +def train(args): + """ + CIFAR-10 training set creator. + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + :return: Training reader creator + :rtype: callable + """ + + return reader_creator_filepath(args.data, 'train.txt', True) + + +def test(args): + """ + CIFAR-10 test set creator. + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + :return: Test reader creator. + :rtype: callable + """ + return reader_creator_filepath(args.data, 'val.txt', False) diff --git a/LRC/run.sh b/LRC/run.sh deleted file mode 100644 index 9f1a045..0000000 --- a/LRC/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \ ---batch_size=80 \ ---auxiliary \ ---weight_decay=0.0003 \ ---learning_rate=0.025 \ ---lrc_loss_lambda=0.7 \ ---cutout - diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py new file mode 100644 index 0000000..688160e --- /dev/null +++ b/LRC/train_imagenet.py @@ -0,0 +1,254 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from learning_rate import cosine_with_warmup_decay +import numpy as np +import argparse +from model import NetworkImageNet as Network +import reader_imagenet as reader +import sys +import os +import time +import logging +import genotypes +import paddle +import paddle.fluid as fluid +import shutil +import utils +import math + +parser = argparse.ArgumentParser("imagenet") +parser.add_argument( + '--data', + type=str, + default='./dataset/imagenet/', + help='location of the data corpus') +parser.add_argument('--batch_size', type=int, default=64, help='batch size') +parser.add_argument( + '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load') +parser.add_argument('--model_id', type=int, default=2, help='model id') +parser.add_argument( + '--learning_rate', type=float, default=0.025, help='init learning rate') +parser.add_argument('--momentum', type=float, default=0.9, help='momentum') +parser.add_argument( + '--weight_decay', type=float, default=4e-5, help='weight decay') +parser.add_argument( + '--report_freq', type=float, default=10, help='report frequency') +parser.add_argument( + '--epochs', type=int, default=90, help='num of training epochs') +parser.add_argument( + '--init_channels', type=int, default=96, help='num of init channels') +parser.add_argument( + '--layers', type=int, default=20, help='total number of layers') +parser.add_argument( + '--save_model_path', + type=str, + default='save_models', + help='path to save the model') +parser.add_argument( + '--auxiliary', + action='store_true', + default=False, + help='use auxiliary tower') +parser.add_argument( + '--auxiliary_weight', + type=float, + default=0.4, + help='weight for auxiliary loss') +parser.add_argument( + '--drop_path_prob', type=float, default=0.4, help='drop path probability') +parser.add_argument( + '--arch', type=str, default='DARTS', help='which architecture to use') +parser.add_argument( + '--grad_clip', type=float, default=5, help='gradient clipping') +parser.add_argument( + '--warmup_epochs', + default=5, + type=float, + help='warm up to learning rate') +parser.add_argument('--lr_min', type=float, default=0.0001, + help='minimum learning rate for a single GPU') + +args = parser.parse_args() + +ImageNet_CLASSES = 1000 +dataset_train_size = 1281167 +image_size = 224 +genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] + +def main(): + image_shape = [3, image_size, image_size] + devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" + devices_num = len(devices.split(",")) + logging.info("args = %s", args) + genotype = eval("genotypes.%s" % args.arch) + model = Network(args.init_channels, ImageNet_CLASSES, args.layers, + args.auxiliary, genotype) + + steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) + train(model, args, image_shape, steps_one_epoch, devices_num) + + +def build_program(main_prog, startup_prog, args, is_train, model, im_shape, + steps_one_epoch, num_gpu): + out = [] + with fluid.program_guard(main_prog, startup_prog): + py_reader = model.build_input(im_shape, is_train) + if is_train: + with fluid.unique_name.guard(): + loss = model.train_model(py_reader, args.init_channels, + args.auxiliary, args.auxiliary_weight) + optimizer = fluid.optimizer.Momentum( + learning_rate=cosine_with_warmup_decay(\ + args.learning_rate, args.lr_min, steps_one_epoch,\ + args.warmup_epochs, args.epochs, num_gpu), + regularization=fluid.regularizer.L2Decay(\ + args.weight_decay), + momentum=args.momentum) + optimizer.minimize(loss) + out = [py_reader, loss] + else: + with fluid.unique_name.guard(): + prob, acc_1, acc_5 = model.test_model(py_reader, + args.init_channels) + out = [py_reader, prob, acc_1, acc_5] + return out + + +def train(model, args, im_shape, steps_one_epoch, num_gpu): + train_startup_prog = fluid.Program() + test_startup_prog = fluid.Program() + train_prog = fluid.Program() + test_prog = fluid.Program() + + train_py_reader, loss_train = build_program(train_prog, train_startup_prog, + args, True, model, im_shape, + steps_one_epoch, num_gpu) + + test_py_reader, prob, acc_1, acc_5 = build_program( + test_prog, test_startup_prog, args, False, model, im_shape, + steps_one_epoch, num_gpu) + + test_prog = test_prog.clone(for_test=True) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(train_startup_prog) + exe.run(test_startup_prog) + + #if args.pretrained_model: + + # def if_exist(var): + # return os.path.exists(os.path.join(args.pretrained_model, var.name)) + + # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) + + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_threads = 1 + train_exe = fluid.ParallelExecutor( + main_program=train_prog, + use_cuda=True, + loss_name=loss_train.name, + exec_strategy=exec_strategy) + + train_batch_size = args.batch_size + test_batch_size = 256 + train_reader = paddle.batch( + reader.train(args), batch_size=train_batch_size, drop_last=True) + test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size) + + train_py_reader.decorate_paddle_reader(train_reader) + test_py_reader.decorate_paddle_reader(test_reader) + + fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) + train_fetch_list = [loss_train] + fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) + + def save_model(postfix, main_prog): + model_path = os.path.join(args.save_model_path, postfix) + if os.path.isdir(model_path): + shutil.rmtree(model_path) + fluid.io.save_persistables(exe, model_path, main_program=main_prog) + + def test(epoch_id): + test_fetch_list = [prob, acc_1, acc_5] + #objs = utils.AvgrageMeter() + #prob = [] + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + test_py_reader.start() + test_start_time = time.time() + step_id = 0 + try: + while True: + prev_test_start_time = test_start_time + test_start_time = time.time() + prob_v, acc_1_v, acc_5_v = exe.run( + test_prog, fetch_list=test_fetch_list) + top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) + top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) + if step_id % args.report_freq == 0: + print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". + format(epoch_id, step_id, + np.array(acc_1_v), + np.array(acc_5_v), test_start_time - + prev_test_start_time)) + step_id += 1 + except fluid.core.EOFException: + test_py_reader.reset() + print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, + top5.avg)) + + epoch_start_time = time.time() + for epoch_id in range(args.epochs): + model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs + train_py_reader.start() + epoch_end_time = time.time() + if epoch_id > 0: + print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time + - epoch_start_time)) + epoch_start_time = epoch_end_time + epoch_end_time + start_time = time.time() + step_id = 0 + try: + while True: + prev_start_time = start_time + start_time = time.time() + loss_v, = train_exe.run( + fetch_list=[v.name for v in train_fetch_list]) + print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \ + np.array(loss_v).mean(), start_time-prev_start_time)) + step_id += 1 + sys.stdout.flush() + os._exit(1) + except fluid.core.EOFException: + train_py_reader.reset() + if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: + save_model(str(epoch_id), train_prog) + test(epoch_id) + + +if __name__ == '__main__': + main() diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py index de752c8..1e67c6a 100644 --- a/LRC/train_mixup.py +++ b/LRC/train_mixup.py @@ -26,7 +26,7 @@ import numpy as np import argparse from model import NetworkCIFAR as Network -import reader +import reader_cifar as reader import sys import os import time @@ -35,7 +35,7 @@ import paddle.fluid as fluid import shutil import utils -import cPickle as cp +import math parser = argparse.ArgumentParser("cifar") parser.add_argument( @@ -44,6 +44,9 @@ default='./dataset/cifar/cifar-10-batches-py/', help='location of the data corpus') parser.add_argument('--batch_size', type=int, default=96, help='batch size') +parser.add_argument( + '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load') +parser.add_argument('--model_id', type=int, help='model id') parser.add_argument( '--learning_rate', type=float, default=0.025, help='init learning rate') parser.add_argument('--momentum', type=float, default=0.9, help='momentum') @@ -58,7 +61,7 @@ parser.add_argument( '--layers', type=int, default=20, help='total number of layers') parser.add_argument( - '--model_path', + '--save_model_path', type=str, default='saved_models', help='path to save the model') @@ -78,7 +81,6 @@ '--cutout_length', type=int, default=16, help='cutout length') parser.add_argument( '--drop_path_prob', type=float, default=0.2, help='drop path probability') -parser.add_argument('--save', type=str, default='EXP', help='experiment name') parser.add_argument( '--arch', type=str, default='DARTS', help='which architecture to use') parser.add_argument( @@ -100,9 +102,9 @@ args = parser.parse_args() CIFAR_CLASSES = 10 -dataset_train_size = 50000 +dataset_train_size = 50000. image_size = 32 - +genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] def main(): image_shape = [3, image_size, image_size] @@ -112,7 +114,8 @@ def main(): genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) - steps_one_epoch = dataset_train_size / (devices_num * args.batch_size) + + steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch) @@ -120,12 +123,12 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, steps_one_epoch): out = [] with fluid.program_guard(main_prog, startup_prog): - py_reader = model.build_input(im_shape, args.batch_size, is_train) + py_reader = model.build_input(im_shape, is_train) if is_train: with fluid.unique_name.guard(): loss = model.train_model(py_reader, args.init_channels, args.auxiliary, args.auxiliary_weight, - args.batch_size, args.lrc_loss_lambda) + args.lrc_loss_lambda) optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(args.learning_rate, \ args.epochs, steps_one_epoch), @@ -136,9 +139,9 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, out = [py_reader, loss] else: with fluid.unique_name.guard(): - loss, acc_1, acc_5 = model.test_model(py_reader, + prob, acc_1, acc_5 = model.test_model(py_reader, args.init_channels) - out = [py_reader, loss, acc_1, acc_5] + out = [py_reader, prob, acc_1, acc_5] return out @@ -152,7 +155,7 @@ def train(model, args, im_shape, steps_one_epoch): args, True, model, im_shape, steps_one_epoch) - test_py_reader, loss_test, acc_1, acc_5 = build_program( + test_py_reader, prob, acc_1, acc_5 = build_program( test_prog, test_startup_prog, args, False, model, im_shape, steps_one_epoch) @@ -163,6 +166,13 @@ def train(model, args, im_shape, steps_one_epoch): exe.run(train_startup_prog) exe.run(test_startup_prog) + #if args.pretrained_model: + + # def if_exist(var): + # return os.path.exists(os.path.join(args.pretrained_model, var.name)) + + # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) + exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 train_exe = fluid.ParallelExecutor( @@ -170,23 +180,27 @@ def train(model, args, im_shape, steps_one_epoch): use_cuda=True, loss_name=loss_train.name, exec_strategy=exec_strategy) + + train_reader = reader.train10(args) test_reader = reader.test10(args) train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) - fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip)) - fluid.memory_optimize(fluid.default_main_program()) + fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) + train_fetch_list = [loss_train] + fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) def save_model(postfix, main_prog): - model_path = os.path.join(args.model_path, postfix) + model_path = os.path.join(args.save_model_path, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path, main_program=main_prog) def test(epoch_id): - test_fetch_list = [loss_test, acc_1, acc_5] - objs = utils.AvgrageMeter() + test_fetch_list = [prob, acc_1, acc_5] + #objs = utils.AvgrageMeter() + #prob = [] top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() test_py_reader.start() @@ -196,11 +210,10 @@ def test(epoch_id): while True: prev_test_start_time = test_start_time test_start_time = time.time() - loss_test_v, acc_1_v, acc_5_v = exe.run( + prob_v, acc_1_v, acc_5_v = exe.run( test_prog, fetch_list=test_fetch_list) - objs.update(np.array(loss_test_v), args.batch_size) - top1.update(np.array(acc_1_v), args.batch_size) - top5.update(np.array(acc_5_v), args.batch_size) + top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) + top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) if step_id % args.report_freq == 0: print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". format(epoch_id, step_id, @@ -213,7 +226,6 @@ def test(epoch_id): print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, top5.avg)) - train_fetch_list = [loss_train] epoch_start_time = time.time() for epoch_id in range(args.epochs): model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs diff --git a/LRC/utils.py b/LRC/utils.py index 4002b57..1896e1d 100644 --- a/LRC/utils.py +++ b/LRC/utils.py @@ -34,6 +34,10 @@ def mixup_data(x, y, batch_size, alpha=1.0): lam = 1. index = np.random.permutation(batch_size) + # + #lam = 0.5 + #index = np.arange(batch_size-1, -1, -1) + # mixed_x = lam * x + (1 - lam) * x[index, :] y_a, y_b = y, y[index] return mixed_x.astype('float32'), y_a.astype('int64'),\ diff --git a/LRC/voting.py b/LRC/voting.py new file mode 100644 index 0000000..ad43c56 --- /dev/null +++ b/LRC/voting.py @@ -0,0 +1,22 @@ +import numpy as np +import cPickle as cp +import sys, os + +#model_path = 'final_paddle-results' +model_path = 'paddle-results' +fl = os.listdir(model_path) +labels = np.load('labels.npz')['arr_0'] +pred = np.zeros((10000, 10)) +fl.sort() +i = 0 +weight=1 +for f in fl: + print(f) + if i == 1: weight=1.2 + if i == 2: weight=0.8 + if i == 3: weight=1.3 + if i == 4: weight=1.1 + if i == 5: weight=0.9 + pred += weight* cp.load(open(os.path.join(model_path, f))) + print(np.mean(np.argmax(pred, axis=1) == labels)) + i += 1 From bca39465b18d8b218e0209ef8d6f80f48db8c84a Mon Sep 17 00:00:00 2001 From: jerrywgz Date: Tue, 9 Apr 2019 02:49:47 +0000 Subject: [PATCH 2/4] add imagenet model --- .../{download.sh => download_cifar.sh} | 0 LRC/dataset/download_imagenet.sh | 40 +++++++++++++++++++ LRC/model.py | 14 ++++--- LRC/reader_imagenet.py | 2 +- LRC/train_imagenet.py | 20 ++++++---- 5 files changed, 61 insertions(+), 15 deletions(-) rename LRC/dataset/{download.sh => download_cifar.sh} (100%) create mode 100644 LRC/dataset/download_imagenet.sh diff --git a/LRC/dataset/download.sh b/LRC/dataset/download_cifar.sh similarity index 100% rename from LRC/dataset/download.sh rename to LRC/dataset/download_cifar.sh diff --git a/LRC/dataset/download_imagenet.sh b/LRC/dataset/download_imagenet.sh new file mode 100644 index 0000000..947b890 --- /dev/null +++ b/LRC/dataset/download_imagenet.sh @@ -0,0 +1,40 @@ +set -e +if [ "x${IMAGENET_USERNAME}" == x -o "x${IMAGENET_ACCESS_KEY}" == x ];then + echo "Please create an account on image-net.org." + echo "It will provide you a pair of username and accesskey to download imagenet data." + read -p "Username: " IMAGENET_USERNAME + read -p "Accesskey: " IMAGENET_ACCESS_KEY +fi + +root_url=http://www.image-net.org/challenges/LSVRC/2012/nnoupb +valid_tar=ILSVRC2012_img_val.tar +train_tar=ILSVRC2012_img_train.tar +train_folder=train/ +valid_folder=val/ + +echo "Download imagenet training data..." +mkdir -p ${train_folder} +wget -nd -c ${root_url}/${train_tar} +tar xf ${train_tar} -C ${train_folder} + +cd ${train_folder} +for x in `ls *.tar` +do + filename=`basename $x .tar` + mkdir -p $filename + tar -xf $x -C $filename + rm -rf $x +done +cd - + +echo "Download imagenet validation data..." +mkdir -p ${valid_folder} +wget -nd -c ${root_url}/${valid_tar} +tar xf ${valid_tar} -C ${valid_folder} + +echo "Download imagenet label file: val_list.txt & train_list.txt" +label_file=ImageNet_label.tgz +label_url=http://imagenet-data.bj.bcebos.com/${label_file} +wget -nd -c ${label_url} +tar zxf ${label_file} + diff --git a/LRC/model.py b/LRC/model.py index b98def2..ca79be0 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -93,7 +93,8 @@ def forward(self, s0, s1, drop_prob, is_train, name): dropout_implementation='upscale_in_train') s = h3 + h4 out += [s] - return fluid.layers.concat([out[i] for i in self._concat], axis=1) + concat_ = fluid.layers.concat([out[i] for i in self._concat], axis=1, name=name+'concat') + return concat_ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): @@ -337,7 +338,7 @@ def lrc_loss(self): return lrc_loss_mean def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input, inplace=True) + relu_a = fluid.layers.relu(input, inplace=False) #relu_a.persistable = True #print(relu_a) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2) @@ -405,10 +406,11 @@ def Stem0Conv(input, C_out): bias_attr=ParamAttr( initializer=Constant(0.), name='stem0.1.bias'), moving_mean_name='stem0.1.running_mean', - moving_variance_name='stem0.1.running_var') - relu_a = fluid.layers.relu(bn_a, inplace=True) + moving_variance_name='stem0.1.running_var', + act='relu') + #relu_a = fluid.layers.relu(bn_a,inplace=True) conv_b = fluid.layers.conv2d( - relu_a, + bn_a, C_out, 3, padding=1, @@ -428,7 +430,7 @@ def Stem0Conv(input, C_out): return bn_b def Stem1Conv(input, C_out): - relu_a = fluid.layers.relu(input, inplace=True) + relu_a = fluid.layers.relu(input,inplace=False) conv_a = fluid.layers.conv2d( relu_a, C_out, diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py index 6577869..b8d28aa 100644 --- a/LRC/reader_imagenet.py +++ b/LRC/reader_imagenet.py @@ -140,7 +140,7 @@ def train(args): :rtype: callable """ - return reader_creator_filepath(args.data, 'train.txt', True) + return reader_creator_filepath(args.data, 'debug.txt', True) def test(args): diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py index 688160e..17e5486 100644 --- a/LRC/train_imagenet.py +++ b/LRC/train_imagenet.py @@ -163,14 +163,19 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): # return os.path.exists(os.path.join(args.pretrained_model, var.name)) # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) - + #build_strategy = fluid.BuildStrategy() + #build_strategy.enable_inplace = False + #build_strategy.memory_optimize = False + train_fetch_list = [loss_train] + + fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_threads = 1 + #exec_strategy.num_threads = 1 train_exe = fluid.ParallelExecutor( - main_program=train_prog, - use_cuda=True, - loss_name=loss_train.name, - exec_strategy=exec_strategy) + main_program=train_prog, + use_cuda=True, + loss_name=loss_train.name, + exec_strategy=exec_strategy) train_batch_size = args.batch_size test_batch_size = 256 @@ -182,8 +187,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): test_py_reader.decorate_paddle_reader(test_reader) fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) - train_fetch_list = [loss_train] - fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) + print(train_prog.to_string(True)) def save_model(postfix, main_prog): model_path = os.path.join(args.save_model_path, postfix) From 378f568b040531ae98fa2428bd4cc85de734406a Mon Sep 17 00:00:00 2001 From: jerrywgz Date: Tue, 30 Jul 2019 08:25:27 +0000 Subject: [PATCH 3/4] clean code --- LRC/model.py | 65 +++++------------- LRC/reader_imagenet.py | 137 ++++++++++++++++++++++++++++++++++---- LRC/train_imagenet.py | 33 ++++----- LRC/train_run_imagenet.sh | 5 ++ 4 files changed, 157 insertions(+), 83 deletions(-) create mode 100644 LRC/train_run_imagenet.sh diff --git a/LRC/model.py b/LRC/model.py index ca79be0..8aa9fe4 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -98,9 +98,7 @@ def forward(self, s0, s1, drop_prob, is_train, name): def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input, inplace=True) - #relu_a.persistable = True - #print(relu_a) + relu_a = fluid.layers.relu(input) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3) conv2d_a = fluid.layers.conv2d( pool_a, @@ -144,8 +142,6 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): initializer=Constant(0.), name=bn_b_name + '.bias'), moving_mean_name=bn_b_name + '.running_mean', moving_variance_name=bn_b_name + '.running_var') - #bn_b.persistable = True - #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(bn_b, num_classes, @@ -236,25 +232,14 @@ def forward(self, init_channel, is_train): self.logits_aux = None num_channel = init_channel * 3 s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1) - #s0.persistable = True - #print(s0) - print(s0) for i, cell in enumerate(self.cells): - #s1.persistable = True - #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): if self._auxiliary and self.training: - #s1.persistable = True - #print(s1) self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) - #self.logits_aux.persistable = True - #print(self.logits_aux) out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") - #out.persistable = True - #print(out) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( @@ -263,9 +248,6 @@ def forward(self, init_channel, is_train): bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) - #self.logits.persistable = True - #print(self.logits) - #print(self.logits_aux) return self.logits, self.logits_aux def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda): @@ -302,7 +284,6 @@ def mixup_loss(self, auxiliary, auxiliary_weight): loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux) loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam ) * loss_b_aux_mean - #print(loss_aux) return loss + auxiliary_weight * loss_aux def lrc_loss(self): @@ -338,10 +319,8 @@ def lrc_loss(self): return lrc_loss_mean def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input, inplace=False) - #relu_a.persistable = True - #print(relu_a) - pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2) + relu_a = fluid.layers.relu(input) + pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=3) conv2d_a = fluid.layers.conv2d( pool_a, 128, @@ -374,8 +353,6 @@ def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): uniform=False, fan_in=0), name=aux_name + '.features.5.weight'), bias_attr=False) - #bn_b.persistable = True - #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(conv2d_b, num_classes, @@ -413,6 +390,7 @@ def Stem0Conv(input, C_out): bn_a, C_out, 3, + stride=2, padding=1, param_attr=ParamAttr( initializer=Xavier( @@ -430,7 +408,7 @@ def Stem0Conv(input, C_out): return bn_b def Stem1Conv(input, C_out): - relu_a = fluid.layers.relu(input,inplace=False) + relu_a = fluid.layers.relu(input) conv_a = fluid.layers.conv2d( relu_a, C_out, @@ -452,10 +430,9 @@ def Stem1Conv(input, C_out): return bn_a class NetworkImageNet(object): - def __init__(self, C, class_num, layers, auxiliary, genotype): + def __init__(self, C, class_num, layers, genotype): self.class_num = class_num self._layers = layers - self._auxiliary = auxiliary self.drop_path_prob = 0 @@ -499,28 +476,19 @@ def build_input(self, image_shape, is_train): return py_reader - def forward(self, init_channel, is_train): + def forward(self, is_train): self.training = is_train self.logits_aux = None - num_channel = init_channel * 3 s0 = self.stem0(self.image) s1 = self.stem1(s0) for i, cell in enumerate(self.cells): - #s1.persistable = True - #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: - #s1.persistable = True - #print(s1) + if self.training: self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) - #self.logits_aux.persistable = True - #print(self.logits_aux) - out = fluid.layers.pool2d(s1, 7, "avg") - #out.persistable = True - #print(out) + out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( @@ -529,12 +497,9 @@ def forward(self, init_channel, is_train): bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) - #self.logits.persistable = True - #print(self.logits) - #print(self.logits_aux) return self.logits, self.logits_aux - def calc_loss(self, auxiliary, auxiliary_weight): + def calc_loss(self, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) @@ -548,15 +513,15 @@ def calc_loss(self, auxiliary, auxiliary_weight): loss_aux_mean = fluid.layers.reduce_mean(loss_aux) return loss_mean + auxiliary_weight * loss_aux_mean - def train_model(self, py_reader, init_channels, aux, aux_w): + def train_model(self, py_reader, aux_w): self.image, self.label = fluid.layers.read_file(py_reader) - self.logits, self.logits_aux = self.forward(init_channels, True) - self.loss = self.calc_loss(aux, aux_w) + self.logits, self.logits_aux = self.forward(True) + self.loss = self.calc_loss(aux_w) return self.loss - def test_model(self, py_reader, init_channels): + def test_model(self, py_reader): self.image, self.label = fluid.layers.read_file(py_reader) - self.logits, _ = self.forward(init_channels, False) + self.logits, _ = self.forward(False) prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1) diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py index b8d28aa..0701952 100644 --- a/LRC/reader_imagenet.py +++ b/LRC/reader_imagenet.py @@ -22,7 +22,6 @@ from PIL import Image from PIL import ImageOps import numpy as np - try: import cPickle as pickle except: @@ -35,38 +34,64 @@ import functools import paddle.reader import math +import cv2 __all__ = ['train10', 'test10'] -train_image_size = 224 +train_image_size = 320 test_image_size = 256 CIFAR_MEAN = [0.485, 0.456, 0.406] CIFAR_STD = [0.229, 0.224, 0.225] +def _parse_kv(r): + """ parse kv data from sequence file for imagenet + """ + import cPickle + k, v = r + obj = cPickle.loads(v) + return obj['image'], obj['label'] + def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): aspect_ratio = math.sqrt(np.random.uniform(*ratio)) w = 1. * aspect_ratio h = 1. / aspect_ratio - bound = min((float(img.size[0]) / img.size[1]) / (w**2), - (float(img.size[1]) / img.size[0]) / (h**2)) + # PIL + #bound = min((float(img.size[0]) / img.size[1]) / (w**2), + # (float(img.size[1]) / img.size[0]) / (h**2)) + # cv2 + bound = min((float(img.shape[1]) / img.shape[0]) / (w**2), + (float(img.shape[0]) / img.shape[1]) / (h**2)) scale_max = min(scale[1], bound) scale_min = min(scale[0], bound) - target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, - scale_max) + # PIL + #target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + # scale_max) + #cv2 + target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) w = int(target_size * w) h = int(target_size * h) - i = np.random.randint(0, img.size[0] - w + 1) - j = np.random.randint(0, img.size[1] - h + 1) + # PIL + #i = np.random.randint(0, img.size[0] - w + 1) + #j = np.random.randint(0, img.size[1] - h + 1) - img = img.crop((i, j, i + w, j + h)) - img = img.resize((size, size), Image.BILINEAR) + #img = img.crop((i, j, i + w, j + h)) + #img = img.resize((size, size), Image.BILINEAR) + # cv2 + i = np.random.randint(0, img.shape[0] - h + 1) + j = np.random.randint(0, img.shape[1] - w + 1) + img = img[i:i+h, j:j+w,:] + img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR) return img +# PIL +""" def crop_image(img, target_size, center=True): width, height = img.size size = target_size @@ -80,7 +105,24 @@ def crop_image(img, target_size, center=True): h_end = h_start + size img = img.crop((w_start, h_start, w_end, h_end)) return img - +""" +# cv2 +def crop_image(img, target_size, center=True): + height, width = img.shape[:2] + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img[h_start:h_end, w_start:w_end,:] + return img + +# PIL +""" def preprocess(img_path, is_training): img = Image.open(img_path) @@ -108,14 +150,34 @@ def preprocess(img_path, is_training): img = np.transpose(img, (2, 0, 1)) return img +""" +# cv2 +def preprocess(img_path, is_training): + + img = cv2.imread(img_path) + if is_training: + # random resize crop + img = random_crop(img, train_image_size) + # random horizontal flip + if np.random.randint(2): + img = cv2.flip(img, 1) + else: + # resize + img = cv2.resize(img, (test_image_size, test_image_size), interpolation=cv2.INTER_LINEAR) + # center crop + img = crop_image(img, train_image_size) + img_float = img[:,:,::-1].astype('float32') / 255 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + img = np.transpose(img, (2, 0, 1)) + + return img def reader_creator_filepath(data_dir, sub_name, is_training): file_list = os.path.join(data_dir, sub_name) image_file = 'train' if is_training else 'val' dataset_path = os.path.join(data_dir, image_file) - print(dataset_path) def reader(): with open(file_list) as flist: lines = [line.strip() for line in flist] @@ -130,6 +192,49 @@ def reader(): return reader +def _reader_creator(data_dir, file_list,is_training): + def multiprocess_reader(): + full_lines = [line.strip() for line in file_list] + # NOTE:maybe do not need shuffle here! + if is_training: + np.random.shuffle(full_lines) + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + img = preprocess(img_path,is_training) + yield img, int(label) +# multiprocess_reader() + return multiprocess_reader + +def mul_reader_creator_filepath(data_dir, sub_name, is_training): + + file_list = os.path.join(data_dir, sub_name) + image_file = 'train' if is_training else 'val' + dataset_path = os.path.join(data_dir, image_file) + + with open(file_list,'r')as f_dir: + lines = f_dir.readlines() + + num_workers = 16 + + n = int(math.ceil(len(lines)/float(num_workers))) + +# global shuffle without image classification " pass seed " strategy + if is_training: + np.random.shuffle(lines) + split_lists = [lines[i:i+n] for i in range(0,len(lines),n)] + readers = [] + for item in split_lists: + readers.append( + _reader_creator( + dataset_path, + item, + 'True' + ) + ) + return paddle.reader.multiprocess_reader(readers,False) + + def train(args): """ @@ -140,7 +245,10 @@ def train(args): :rtype: callable """ - return reader_creator_filepath(args.data, 'debug.txt', True) + # return reader_creator_filepath(args.data, 'train.txt', True) + return mul_reader_creator_filepath('./dataset/imagenet', 'train.txt', True) + + def test(args): @@ -151,4 +259,5 @@ def test(args): :return: Test reader creator. :rtype: callable """ - return reader_creator_filepath(args.data, 'val.txt', False) + return mul_reader_creator_filepath('./dataset/imagenet', 'val.txt', False) + # return reader_creator_filepath(args.data, 'val.txt', False) diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py index 17e5486..273b1f2 100644 --- a/LRC/train_imagenet.py +++ b/LRC/train_imagenet.py @@ -66,11 +66,6 @@ type=str, default='save_models', help='path to save the model') -parser.add_argument( - '--auxiliary', - action='store_true', - default=False, - help='use auxiliary tower') parser.add_argument( '--auxiliary_weight', type=float, @@ -94,7 +89,7 @@ ImageNet_CLASSES = 1000 dataset_train_size = 1281167 -image_size = 224 +image_size = 320 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] def main(): @@ -104,7 +99,7 @@ def main(): logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, ImageNet_CLASSES, args.layers, - args.auxiliary, genotype) + genotype) steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch, devices_num) @@ -117,8 +112,8 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, py_reader = model.build_input(im_shape, is_train) if is_train: with fluid.unique_name.guard(): - loss = model.train_model(py_reader, args.init_channels, - args.auxiliary, args.auxiliary_weight) + loss = model.train_model(py_reader, + args.auxiliary_weight) optimizer = fluid.optimizer.Momentum( learning_rate=cosine_with_warmup_decay(\ args.learning_rate, args.lr_min, steps_one_epoch,\ @@ -130,8 +125,7 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, out = [py_reader, loss] else: with fluid.unique_name.guard(): - prob, acc_1, acc_5 = model.test_model(py_reader, - args.init_channels) + prob, acc_1, acc_5 = model.test_model(py_reader) out = [py_reader, prob, acc_1, acc_5] return out @@ -163,19 +157,20 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): # return os.path.exists(os.path.join(args.pretrained_model, var.name)) # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) - #build_strategy = fluid.BuildStrategy() - #build_strategy.enable_inplace = False - #build_strategy.memory_optimize = False + build_strategy = fluid.BuildStrategy() + build_strategy.enable_inplace = True + build_strategy.memory_optimize = False train_fetch_list = [loss_train] - fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) + #fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) exec_strategy = fluid.ExecutionStrategy() - #exec_strategy.num_threads = 1 + exec_strategy.num_threads = 1 train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=True, loss_name=loss_train.name, - exec_strategy=exec_strategy) + exec_strategy=exec_strategy, + build_strategy=build_strategy) train_batch_size = args.batch_size test_batch_size = 256 @@ -187,7 +182,7 @@ def train(model, args, im_shape, steps_one_epoch, num_gpu): test_py_reader.decorate_paddle_reader(test_reader) fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) - print(train_prog.to_string(True)) + #print(train_prog.to_string(True)) def save_model(postfix, main_prog): model_path = os.path.join(args.save_model_path, postfix) @@ -246,7 +241,7 @@ def test(epoch_id): np.array(loss_v).mean(), start_time-prev_start_time)) step_id += 1 sys.stdout.flush() - os._exit(1) + #os._exit(1) except fluid.core.EOFException: train_py_reader.reset() if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: diff --git a/LRC/train_run_imagenet.sh b/LRC/train_run_imagenet.sh new file mode 100644 index 0000000..fc472ba --- /dev/null +++ b/LRC/train_run_imagenet.sh @@ -0,0 +1,5 @@ +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_fraction_of_gpu_memory_to_use=1. +nohup env CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u train_imagenet.py --batch_size=64 > imagenet.log 2>&1 & + From e3d910f460edd3c6ce2282c476ae1bbe71e32d29 Mon Sep 17 00:00:00 2001 From: jerrywgz Date: Tue, 30 Jul 2019 09:00:16 +0000 Subject: [PATCH 4/4] clean code --- LRC/learning_rate.py | 1 - LRC/model.py | 1 - LRC/reader_cifar.py | 13 +++++-------- LRC/train_mixup.py | 11 +++-------- LRC/utils.py | 4 ---- 5 files changed, 8 insertions(+), 22 deletions(-) diff --git a/LRC/learning_rate.py b/LRC/learning_rate.py index 2a19e0e..6658b3e 100644 --- a/LRC/learning_rate.py +++ b/LRC/learning_rate.py @@ -76,4 +76,3 @@ def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch, fluid.layers.assign(cosine_lr, lr) return lr - diff --git a/LRC/model.py b/LRC/model.py index 8df2c76..48953da 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -175,7 +175,6 @@ def StemConv(input, C_out, kernel_size, padding): return bn_a - class NetworkCIFAR(object): def __init__(self, C, class_num, layers, auxiliary, genotype): self._layers = layers diff --git a/LRC/reader_cifar.py b/LRC/reader_cifar.py index f3793ec..d773a44 100644 --- a/LRC/reader_cifar.py +++ b/LRC/reader_cifar.py @@ -52,6 +52,7 @@ CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124] CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] + def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10): reshape_label = np.zeros((batch_size, 1), dtype='int32') reshape_non_label = np.zeros( @@ -88,7 +89,7 @@ def preprocess(sample, is_training, args): image_array = sample.reshape(3, image_size, image_size) rgb_array = np.transpose(image_array, (1, 2, 0)) img = Image.fromarray(rgb_array, 'RGB') - + if is_training: # pad and ramdom crop img = ImageOps.expand(img, (4, 4, 4, 4), fill=0) # pad to 40 * 40 * 3 @@ -97,13 +98,13 @@ def preprocess(sample, is_training, args): left_top[1] + image_size)) if np.random.randint(2): img = img.transpose(Image.FLIP_LEFT_RIGHT) - + img = np.array(img).astype(np.float32) # per_image_standardization img_float = img / 255.0 img = (img_float - CIFAR_MEAN) / CIFAR_STD - + if is_training and args.cutout: center = np.random.randint(image_size, size=2) offset_width = max(0, center[0] - half_length) @@ -114,7 +115,7 @@ def preprocess(sample, is_training, args): for i in range(offset_height, target_height): for j in range(offset_width, target_width): img[i][j][:] = 0.0 - + img = np.transpose(img, (2, 0, 1)) return img @@ -153,10 +154,6 @@ def reader(): if len(batch_data) == args.batch_size: batch_data = np.array(batch_data, dtype='float32') batch_label = np.array(batch_label, dtype='int64') -# -# batch_data = pickle.load(open('input.pkl')) -# batch_label = pickle.load(open('target.pkl')).reshape(-1,1) -# if is_training: flatten_label, flatten_non_label = \ generate_reshape_label(batch_label, args.batch_size) diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py index de3a1be..8f88744 100644 --- a/LRC/train_mixup.py +++ b/LRC/train_mixup.py @@ -70,6 +70,7 @@ image_size = 32 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] + def main(): image_shape = [3, image_size, image_size] devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" @@ -79,7 +80,8 @@ def main(): model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) - steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) + steps_one_epoch = math.ceil(dataset_train_size / + (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch) @@ -136,13 +138,6 @@ def if_exist(var): main_program=train_prog, predicate=if_exist) - #if args.pretrained_model: - - # def if_exist(var): - # return os.path.exists(os.path.join(args.pretrained_model, var.name)) - - # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) - exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 build_strategy = fluid.BuildStrategy() diff --git a/LRC/utils.py b/LRC/utils.py index 1896e1d..4002b57 100644 --- a/LRC/utils.py +++ b/LRC/utils.py @@ -34,10 +34,6 @@ def mixup_data(x, y, batch_size, alpha=1.0): lam = 1. index = np.random.permutation(batch_size) - # - #lam = 0.5 - #index = np.arange(batch_size-1, -1, -1) - # mixed_x = lam * x + (1 - lam) * x[index, :] y_a, y_b = y, y[index] return mixed_x.astype('float32'), y_a.astype('int64'),\