diff --git a/LRC/dataset/download.sh b/LRC/dataset/download_cifar.sh similarity index 100% rename from LRC/dataset/download.sh rename to LRC/dataset/download_cifar.sh diff --git a/LRC/dataset/download_imagenet.sh b/LRC/dataset/download_imagenet.sh new file mode 100644 index 0000000..947b890 --- /dev/null +++ b/LRC/dataset/download_imagenet.sh @@ -0,0 +1,40 @@ +set -e +if [ "x${IMAGENET_USERNAME}" == x -o "x${IMAGENET_ACCESS_KEY}" == x ];then + echo "Please create an account on image-net.org." + echo "It will provide you a pair of username and accesskey to download imagenet data." + read -p "Username: " IMAGENET_USERNAME + read -p "Accesskey: " IMAGENET_ACCESS_KEY +fi + +root_url=http://www.image-net.org/challenges/LSVRC/2012/nnoupb +valid_tar=ILSVRC2012_img_val.tar +train_tar=ILSVRC2012_img_train.tar +train_folder=train/ +valid_folder=val/ + +echo "Download imagenet training data..." +mkdir -p ${train_folder} +wget -nd -c ${root_url}/${train_tar} +tar xf ${train_tar} -C ${train_folder} + +cd ${train_folder} +for x in `ls *.tar` +do + filename=`basename $x .tar` + mkdir -p $filename + tar -xf $x -C $filename + rm -rf $x +done +cd - + +echo "Download imagenet validation data..." +mkdir -p ${valid_folder} +wget -nd -c ${root_url}/${valid_tar} +tar xf ${valid_tar} -C ${valid_folder} + +echo "Download imagenet label file: val_list.txt & train_list.txt" +label_file=ImageNet_label.tgz +label_url=http://imagenet-data.bj.bcebos.com/${label_file} +wget -nd -c ${label_url} +tar zxf ${label_file} + diff --git a/LRC/model.py b/LRC/model.py index 4735439..48953da 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -93,7 +93,8 @@ def forward(self, s0, s1, drop_prob, is_train, name): dropout_implementation='upscale_in_train') s = h3 + h4 out += [s] - return fluid.layers.concat([out[i] for i in self._concat], axis=1) + concat_ = fluid.layers.concat([out[i] for i in self._concat], axis=1, name=name+'concat') + return concat_ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): @@ -243,7 +244,7 @@ def forward(self, init_channel, is_train): initializer=Normal(scale=1e-3), name='classifier.weight'), bias_attr=ParamAttr( - initializer=Constant(0, ), + initializer=Constant(0), name='classifier.bias')) return self.logits, self.logits_aux @@ -371,7 +372,7 @@ def Stem0Conv(input, C_out): initializer=Xavier( uniform=False, fan_in=0), name='stem0.0.weight'), bias_attr=False) - bn_a = fluid.layers.batch_norm( + relu_a = fluid.layers.batch_norm( conv_a, param_attr=ParamAttr( initializer=Constant(1.), name='stem0.1.weight'), @@ -480,7 +481,7 @@ def forward(self, is_train): s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: + if self.training: self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7) self.logits = fluid.layers.fc(out, @@ -489,7 +490,7 @@ def forward(self, is_train): initializer=Normal(scale=1e-3), name='classifier.weight'), bias_attr=ParamAttr( - initializer=Constant(0, ), + initializer=Constant(0), name='classifier.bias')) return self.logits, self.logits_aux diff --git a/LRC/reader_imagenet.py b/LRC/reader_imagenet.py new file mode 100644 index 0000000..0701952 --- /dev/null +++ b/LRC/reader_imagenet.py @@ -0,0 +1,263 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from PIL import Image +from PIL import ImageOps +import numpy as np +try: + import cPickle as pickle +except: + import pickle +import random +import utils +import paddle.fluid as fluid +import time +import os +import functools +import paddle.reader +import math +import cv2 + +__all__ = ['train10', 'test10'] + +train_image_size = 320 +test_image_size = 256 + +CIFAR_MEAN = [0.485, 0.456, 0.406] +CIFAR_STD = [0.229, 0.224, 0.225] + +def _parse_kv(r): + """ parse kv data from sequence file for imagenet + """ + import cPickle + k, v = r + obj = cPickle.loads(v) + return obj['image'], obj['label'] + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + # PIL + #bound = min((float(img.size[0]) / img.size[1]) / (w**2), + # (float(img.size[1]) / img.size[0]) / (h**2)) + # cv2 + bound = min((float(img.shape[1]) / img.shape[0]) / (w**2), + (float(img.shape[0]) / img.shape[1]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + # PIL + #target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + # scale_max) + #cv2 + target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, + scale_max) + + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + # PIL + #i = np.random.randint(0, img.size[0] - w + 1) + #j = np.random.randint(0, img.size[1] - h + 1) + + #img = img.crop((i, j, i + w, j + h)) + #img = img.resize((size, size), Image.BILINEAR) + # cv2 + i = np.random.randint(0, img.shape[0] - h + 1) + j = np.random.randint(0, img.shape[1] - w + 1) + img = img[i:i+h, j:j+w,:] + img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR) + return img + +# PIL +""" +def crop_image(img, target_size, center=True): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img +""" +# cv2 +def crop_image(img, target_size, center=True): + height, width = img.shape[:2] + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img[h_start:h_end, w_start:w_end,:] + return img + +# PIL +""" +def preprocess(img_path, is_training): + + img = Image.open(img_path) + + if is_training: + # ramdom resized crop + img = random_crop(img, train_image_size) + # random horizontal flip + if np.random.randint(2): + img = img.transpose(Image.FLIP_LEFT_RIGHT) + else: + # resize + img = img.resize((test_image_size, test_image_size), Image.BILINEAR) + # center crop + img = crop_image(img, train_image_size) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype(np.float32) + + # per_image_standardization + img_float = img / 255.0 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + + img = np.transpose(img, (2, 0, 1)) + return img +""" +# cv2 +def preprocess(img_path, is_training): + + img = cv2.imread(img_path) + if is_training: + # random resize crop + img = random_crop(img, train_image_size) + # random horizontal flip + if np.random.randint(2): + img = cv2.flip(img, 1) + else: + # resize + img = cv2.resize(img, (test_image_size, test_image_size), interpolation=cv2.INTER_LINEAR) + # center crop + img = crop_image(img, train_image_size) + + img_float = img[:,:,::-1].astype('float32') / 255 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + img = np.transpose(img, (2, 0, 1)) + + return img + +def reader_creator_filepath(data_dir, sub_name, is_training): + + file_list = os.path.join(data_dir, sub_name) + image_file = 'train' if is_training else 'val' + dataset_path = os.path.join(data_dir, image_file) + def reader(): + with open(file_list) as flist: + lines = [line.strip() for line in flist] + if is_training: + np.random.shuffle(lines) + for line in lines: + img_path, label = line.split() + #img_path = img_path.replace("JPEG", "jpeg") + img_path_ = os.path.join(dataset_path, img_path) + img = preprocess(img_path_, is_training) + yield img, int(label) + + return reader + +def _reader_creator(data_dir, file_list,is_training): + def multiprocess_reader(): + full_lines = [line.strip() for line in file_list] + # NOTE:maybe do not need shuffle here! + if is_training: + np.random.shuffle(full_lines) + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + img = preprocess(img_path,is_training) + yield img, int(label) +# multiprocess_reader() + return multiprocess_reader + +def mul_reader_creator_filepath(data_dir, sub_name, is_training): + + file_list = os.path.join(data_dir, sub_name) + image_file = 'train' if is_training else 'val' + dataset_path = os.path.join(data_dir, image_file) + + with open(file_list,'r')as f_dir: + lines = f_dir.readlines() + + num_workers = 16 + + n = int(math.ceil(len(lines)/float(num_workers))) + +# global shuffle without image classification " pass seed " strategy + if is_training: + np.random.shuffle(lines) + split_lists = [lines[i:i+n] for i in range(0,len(lines),n)] + readers = [] + for item in split_lists: + readers.append( + _reader_creator( + dataset_path, + item, + 'True' + ) + ) + return paddle.reader.multiprocess_reader(readers,False) + + + +def train(args): + """ + CIFAR-10 training set creator. + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + :return: Training reader creator + :rtype: callable + """ + + # return reader_creator_filepath(args.data, 'train.txt', True) + return mul_reader_creator_filepath('./dataset/imagenet', 'train.txt', True) + + + + +def test(args): + """ + CIFAR-10 test set creator. + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + :return: Test reader creator. + :rtype: callable + """ + return mul_reader_creator_filepath('./dataset/imagenet', 'val.txt', False) + # return reader_creator_filepath(args.data, 'val.txt', False) diff --git a/LRC/train_imagenet.py b/LRC/train_imagenet.py new file mode 100644 index 0000000..273b1f2 --- /dev/null +++ b/LRC/train_imagenet.py @@ -0,0 +1,253 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from learning_rate import cosine_with_warmup_decay +import numpy as np +import argparse +from model import NetworkImageNet as Network +import reader_imagenet as reader +import sys +import os +import time +import logging +import genotypes +import paddle +import paddle.fluid as fluid +import shutil +import utils +import math + +parser = argparse.ArgumentParser("imagenet") +parser.add_argument( + '--data', + type=str, + default='./dataset/imagenet/', + help='location of the data corpus') +parser.add_argument('--batch_size', type=int, default=64, help='batch size') +parser.add_argument( + '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load') +parser.add_argument('--model_id', type=int, default=2, help='model id') +parser.add_argument( + '--learning_rate', type=float, default=0.025, help='init learning rate') +parser.add_argument('--momentum', type=float, default=0.9, help='momentum') +parser.add_argument( + '--weight_decay', type=float, default=4e-5, help='weight decay') +parser.add_argument( + '--report_freq', type=float, default=10, help='report frequency') +parser.add_argument( + '--epochs', type=int, default=90, help='num of training epochs') +parser.add_argument( + '--init_channels', type=int, default=96, help='num of init channels') +parser.add_argument( + '--layers', type=int, default=20, help='total number of layers') +parser.add_argument( + '--save_model_path', + type=str, + default='save_models', + help='path to save the model') +parser.add_argument( + '--auxiliary_weight', + type=float, + default=0.4, + help='weight for auxiliary loss') +parser.add_argument( + '--drop_path_prob', type=float, default=0.4, help='drop path probability') +parser.add_argument( + '--arch', type=str, default='DARTS', help='which architecture to use') +parser.add_argument( + '--grad_clip', type=float, default=5, help='gradient clipping') +parser.add_argument( + '--warmup_epochs', + default=5, + type=float, + help='warm up to learning rate') +parser.add_argument('--lr_min', type=float, default=0.0001, + help='minimum learning rate for a single GPU') + +args = parser.parse_args() + +ImageNet_CLASSES = 1000 +dataset_train_size = 1281167 +image_size = 320 +genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] + +def main(): + image_shape = [3, image_size, image_size] + devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" + devices_num = len(devices.split(",")) + logging.info("args = %s", args) + genotype = eval("genotypes.%s" % args.arch) + model = Network(args.init_channels, ImageNet_CLASSES, args.layers, + genotype) + + steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) + train(model, args, image_shape, steps_one_epoch, devices_num) + + +def build_program(main_prog, startup_prog, args, is_train, model, im_shape, + steps_one_epoch, num_gpu): + out = [] + with fluid.program_guard(main_prog, startup_prog): + py_reader = model.build_input(im_shape, is_train) + if is_train: + with fluid.unique_name.guard(): + loss = model.train_model(py_reader, + args.auxiliary_weight) + optimizer = fluid.optimizer.Momentum( + learning_rate=cosine_with_warmup_decay(\ + args.learning_rate, args.lr_min, steps_one_epoch,\ + args.warmup_epochs, args.epochs, num_gpu), + regularization=fluid.regularizer.L2Decay(\ + args.weight_decay), + momentum=args.momentum) + optimizer.minimize(loss) + out = [py_reader, loss] + else: + with fluid.unique_name.guard(): + prob, acc_1, acc_5 = model.test_model(py_reader) + out = [py_reader, prob, acc_1, acc_5] + return out + + +def train(model, args, im_shape, steps_one_epoch, num_gpu): + train_startup_prog = fluid.Program() + test_startup_prog = fluid.Program() + train_prog = fluid.Program() + test_prog = fluid.Program() + + train_py_reader, loss_train = build_program(train_prog, train_startup_prog, + args, True, model, im_shape, + steps_one_epoch, num_gpu) + + test_py_reader, prob, acc_1, acc_5 = build_program( + test_prog, test_startup_prog, args, False, model, im_shape, + steps_one_epoch, num_gpu) + + test_prog = test_prog.clone(for_test=True) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(train_startup_prog) + exe.run(test_startup_prog) + + #if args.pretrained_model: + + # def if_exist(var): + # return os.path.exists(os.path.join(args.pretrained_model, var.name)) + + # fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) + build_strategy = fluid.BuildStrategy() + build_strategy.enable_inplace = True + build_strategy.memory_optimize = False + train_fetch_list = [loss_train] + + #fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_threads = 1 + train_exe = fluid.ParallelExecutor( + main_program=train_prog, + use_cuda=True, + loss_name=loss_train.name, + exec_strategy=exec_strategy, + build_strategy=build_strategy) + + train_batch_size = args.batch_size + test_batch_size = 256 + train_reader = paddle.batch( + reader.train(args), batch_size=train_batch_size, drop_last=True) + test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size) + + train_py_reader.decorate_paddle_reader(train_reader) + test_py_reader.decorate_paddle_reader(test_reader) + + fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) + #print(train_prog.to_string(True)) + + def save_model(postfix, main_prog): + model_path = os.path.join(args.save_model_path, postfix) + if os.path.isdir(model_path): + shutil.rmtree(model_path) + fluid.io.save_persistables(exe, model_path, main_program=main_prog) + + def test(epoch_id): + test_fetch_list = [prob, acc_1, acc_5] + #objs = utils.AvgrageMeter() + #prob = [] + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + test_py_reader.start() + test_start_time = time.time() + step_id = 0 + try: + while True: + prev_test_start_time = test_start_time + test_start_time = time.time() + prob_v, acc_1_v, acc_5_v = exe.run( + test_prog, fetch_list=test_fetch_list) + top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) + top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) + if step_id % args.report_freq == 0: + print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". + format(epoch_id, step_id, + np.array(acc_1_v), + np.array(acc_5_v), test_start_time - + prev_test_start_time)) + step_id += 1 + except fluid.core.EOFException: + test_py_reader.reset() + print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, + top5.avg)) + + epoch_start_time = time.time() + for epoch_id in range(args.epochs): + model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs + train_py_reader.start() + epoch_end_time = time.time() + if epoch_id > 0: + print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time + - epoch_start_time)) + epoch_start_time = epoch_end_time + epoch_end_time + start_time = time.time() + step_id = 0 + try: + while True: + prev_start_time = start_time + start_time = time.time() + loss_v, = train_exe.run( + fetch_list=[v.name for v in train_fetch_list]) + print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \ + np.array(loss_v).mean(), start_time-prev_start_time)) + step_id += 1 + sys.stdout.flush() + #os._exit(1) + except fluid.core.EOFException: + train_py_reader.reset() + if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: + save_model(str(epoch_id), train_prog) + test(epoch_id) + + +if __name__ == '__main__': + main() diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py index e268b1c..8f88744 100644 --- a/LRC/train_mixup.py +++ b/LRC/train_mixup.py @@ -79,8 +79,8 @@ def main(): genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) - - steps_one_epoch = math.ceil(dataset_train_size / + + steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch) diff --git a/LRC/train_run_imagenet.sh b/LRC/train_run_imagenet.sh new file mode 100644 index 0000000..fc472ba --- /dev/null +++ b/LRC/train_run_imagenet.sh @@ -0,0 +1,5 @@ +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_fraction_of_gpu_memory_to_use=1. +nohup env CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u train_imagenet.py --batch_size=64 > imagenet.log 2>&1 & +