diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d86846 --- /dev/null +++ b/.gitignore @@ -0,0 +1,189 @@ +/super_train/ + +# Created by https://www.gitignore.io/api/vim,macosPeepOpenython,visualstudiocode +# Edit at https://www.gitignore.io/?templates=vim,macosPeepOpenython,visualstudiocode + +### Compressed ### +*.7z +*.deb +*.gz +*.pkg +*.rar +*.rpm +*.sit +*.sitx +*.tar +*.zip +*.zipx +*.tgz + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ + +# Auto-generated tag files +tags + +# Persistent undo +[._]*.un~ + +# Coc configuration directory +.vim + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history + +# End of https://www.gitignore.io/api/vim,macosPeepOpenython,visualstudiocode diff --git a/README.md b/README.md index 85e94af..cc59af6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,66 @@ +# Finish the MixPath +To finish the MixPath code + +limingyao@ainirobot.com + +Done: +- NSGA-II (use [pymoo](https://pymoo.org)) +- Plot the result + +TODO: +- SNPE/OPENVINO's LookupTable + +## Cifar +### run +**Train** +```python +python S1/train_search.py \ + --exp_name experiment_name \ + --m 4\ + --data_dir ~/.torch/datasets \ + --seed 2020 +``` +**Search** +```python +python S1/eval_search.py \ + --exp_name search_cifar\ + --m 4\ + --data_dir ~/.torch/datasets \ + --model_path ./super_train/experiment_name/super_train_states.pt.tar\ + --batch_size 500\ + --n_generations 40\ + --pop_size 40\ + --n_offsprings 10 +``` + +### Result + +![plot the result 3d](super_train/search_cifar/res_high_tradeoff.png) + +result of search, f1: Accuracy, f2: parameter amount, f3: GPU latency + +![plot the result 2d](super_train/search_cifar/res_high_tradeoff_acc_latency.png) + +result of search, f1: Accuracy, f2: GPU latency + + + +## Accuracy +According to https://github.com/kuangliu/pytorch-cifar +| Model | Acc. | +| ----------------- | ----------- | +| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% | +| [ResNet18](https://arxiv.org/abs/1512.03385) | 93.02% | +| [ResNet50](https://arxiv.org/abs/1512.03385) | 93.62% | +| [ResNet101](https://arxiv.org/abs/1512.03385) | 93.75% | +| [MobileNetV2](https://arxiv.org/abs/1801.04381) | 94.43% | +| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431) | 94.73% | +| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431) | 94.82% | +| [DenseNet121](https://arxiv.org/abs/1608.06993) | 95.04% | +| [PreActResNet18](https://arxiv.org/abs/1603.05027) | 95.11% | +| [DPN92](https://arxiv.org/abs/1707.01629) | 95.16% | +| **MixPath_S1(my)** |**95.29%** | + # MixPath: A Unified Approach for One-shot Neural Architecture Search This repo provides the supernet of S1 and our confirmatory experiments on NAS-Bench-101. diff --git a/S1/eval_search.py b/S1/eval_search.py new file mode 100644 index 0000000..d0886f8 --- /dev/null +++ b/S1/eval_search.py @@ -0,0 +1,244 @@ +import os +import numpy as np +import argparse +from utils import * +import autograd.numpy as anp +import pymoo +from pymoo.util.misc import stack +from pymoo.model.problem import Problem +from pymoo.algorithms.nsga2 import NSGA2 +#from wnsga2 import WNSGA2 +from pymoo.factory import get_sampling, get_crossover, get_mutation +from pymoo.optimize import minimize +from pymoo.factory import get_termination +from pymoo.visualization.scatter import Scatter +from model_search import SuperNetwork +import torch +import time +import math +from itertools import combinations +import collections +import torchvision.datasets as dset +import torch.backends.cudnn as cudnn +from tqdm import tqdm + + +@torch.no_grad() +def naive_latency(model, choice, size:tuple)->float: + tt = time.time() + device = next(model.parameters()).device + input = torch.rand(32, 3, *size).to(device) + for i in range(10): + model(input, choice) + return (time.time() - tt)/10 + + +def combine(n, m): + assert n >= m + f = math.factorial + return int(f(n)/f(m)/f(n-m)) + + +def get_choice_list(path_num, m, rate_num=2): + assert path_num >= m + conv_choices = [] + for i in range(1, m+1): + conv_choices.extend( + combinations(range(path_num), i)) + rate_choices = list(range(rate_num)) + choices = [] + for i in range(rate_num): + for conv_choice in conv_choices: + choices.append( + dict( conv=conv_choice, rate=i ) + ) + return choices + + +class MyProblem(Problem): + def __init__(self, model, valid_queue, device, choices, layers): + self.model = model + self.valid_queue = valid_queue + self.device = device + self.choices = choices + self.generate = 0 + super().__init__(n_var=layers, + n_obj=3, + n_constr=0, + xl=anp.array([0 for i in range(layers)]), + xu=anp.array([len(choices)-1 for i in range(layers)])) + + def intarray2choice(self, x): + choice = collections.OrderedDict() + for i in range(len(x)): + c = self.choices[x[i]] + choice[i] = c + return choice + + def _evaluate(self, x, out, *args, **kwargs): + """ + max acc, parameters + min latency + """ + num_pop = x.shape[0] + f1 = np.zeros(num_pop) + f2 = np.zeros(num_pop) + f3 = np.zeros(num_pop) + for i in range(num_pop): + choice = self.intarray2choice(x[i]) + acc = self.get_accuracy(choice) + f1[i] = 100.0 - acc + para_amount = self.get_para_amount(choice) + f2[i] = - para_amount + latency = self.get_latency(choice) + f3[i] = latency + out["F"] = anp.column_stack([f1, f2, f3]) + self.generate += 1 + + @torch.no_grad() + def get_accuracy(self, choice): + if choice is None: + assert False + self.model.eval() + all_targets = [] + all_outputs = [] + #for step, (inputs, targets) in tqdm(enumerate(self.valid_queue), total=len(self.valid_queue)): + for step, (inputs, targets) in enumerate(self.valid_queue): + inputs, targets = inputs.to(self.device), targets.to(self.device) + all_targets.append(targets) + outputs = self.model(inputs, choice) + all_outputs.append(outputs) + all_targets = torch.cat(all_targets) + all_outputs = torch.cat(all_outputs) + prec1 = accuracy(all_outputs, all_targets, topk=(1,)) + return prec1[0].cpu().item() + + def get_para_amount(self, choice): + return count_parameters_in_MB(self.model.get_submodule(choice)) + + def get_latency(self, choice): + return naive_latency(self.model, choice, size=(32, 32)) + # TODO calculate latency based on + # latency lookup table for SNPE, OPENVINO, etc + + +def get_args(): + parser = argparse.ArgumentParser("Search The MixPath") + parser.add_argument('--exp_name', type=str, required=True, help='search model name') + parser.add_argument('--m', type=int, default=2, required=True, help='num of selected paths as most') + parser.add_argument('--shadow_bn', action='store_false', default=True, help='shadow bn or not, default: True') + parser.add_argument('--data_dir', type=str, default='/home/work/dataset/cifar', help='dataset dir') + parser.add_argument('--classes', type=int, default=10, help='classes') + parser.add_argument('--layers', type=int, default=12, help='num of MB_layers') + parser.add_argument('--kernels', type=list, default=[3, 5, 7, 9], help='selective kernels') + parser.add_argument('--batch_size', type=int, default=96, help='batch size') + parser.add_argument('--epochs', type=int, default=200, help='num of epochs') + parser.add_argument('--seed', type=int, default=2020, help='seed') + parser.add_argument('--search_num', type=int, default=1000, help='num of epochs') + parser.add_argument('--learning_rate', type=float, default=0.025, help='initial learning rate') + parser.add_argument('--learning_rate_min', type=float, default=1e-8, help='min learning rate') + parser.add_argument('--momentum', type=float, default=0.9, help='momentum') + parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') + parser.add_argument('--train_interval', type=int, default=1, help='train to print frequency') + parser.add_argument('--val_interval', type=int, default=5, help='evaluate and save frequency') + parser.add_argument('--dropout_rate', type=float, default=0.2, help='drop out rate') + parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop_path_prob') + parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') + parser.add_argument('--gpu', type=int, default=0, help='gpu id') + parser.add_argument('--resume', type=bool, default=False, help='resume') + # ******************************* dataset *******************************# + parser.add_argument('--dataset', type=str, default='cifar10', help='[cifar10, imagenet]') + parser.add_argument('--cutout', action='store_false', default=True, help='use cutout') + parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') + parser.add_argument('--colorjitter', action='store_true', default=False, help='use colorjitter') + #*************GA*****************# + parser.add_argument('--model_path' , type=str, required=True) + parser.add_argument('--pop_size', type=int, default=40) + parser.add_argument('--n_offsprings', type=int, default=10) + parser.add_argument('--n_generations', type=int, default=40) + + arguments = parser.parse_args() + + + return arguments + + +def main(): + args = get_args() + print(args) + + # prepare dir + if not os.path.exists('./super_train'): + os.mkdir('./super_train') + if not os.path.exists('./super_train/{}'.format(args.exp_name)): + save_path = './super_train/{}'.format(args.exp_name) + os.mkdir(save_path) + + # device + if not torch.cuda.is_available(): + device = torch.device('cpu') + else: + torch.cuda.set_device(args.gpu) + cudnn.benchmark = True + cudnn.enabled = True + device = torch.device("cuda") + + + model = SuperNetwork(shadow_bn=args.shadow_bn, layers=args.layers, classes=args.classes) + model = model.to(device) + print("param size of supernet = %fMB" % count_parameters_in_MB(model)) + checkpoint = torch.load(args.model_path) + model.load_state_dict(checkpoint['supernet_state']) + train_transform, valid_transform = data_transforms_cifar(args) + valset = dset.CIFAR10(root=args.data_dir, train=False, download=False, transform=valid_transform) + valid_queue = torch.utils.data.DataLoader(valset, batch_size=args.batch_size, + shuffle=False, pin_memory=True, num_workers=8) + + choices = get_choice_list(path_num=len(args.kernels), m=args.m) + problem = MyProblem(model, valid_queue, device, choices, args.layers) + + algorithm = NSGA2( + pop_size=args.pop_size, + n_offsprings=args.n_offsprings, + sampling=get_sampling("int_random"), + crossover=get_crossover("int_one_point"), + mutation=get_mutation("int_pm"), + eliminate_duplicates=True, + ) + termination = get_termination("n_gen", args.n_generations) + + res = minimize(problem, + algorithm, + termination, + seed=1, + pf=problem.pareto_front(use_cache=False), + save_history=True, + verbose=True) + print(res) + save_path = './super_train/{}'.format(args.exp_name) + torch.save(res, f"{save_path}/res.pkl") + # TODO plot 3D pareto_front points + ## get the pareto-set and pareto-front for plotting + #ps = problem.pareto_set(use_cache=False, flatten=False) + #pf = problem.pareto_front(use_cache=False, flatten=False) + + ## Design Space + #plot = Scatter(title = "Design Space", axis_labels="x") + #plot.add(res.X, s=30, facecolors='none', edgecolors='r') + #plot.add(ps, plot_type="line", color="black", alpha=0.7) + #plot.do() + #plot.apply(lambda ax: ax.set_xlim(-0.5, 1.5)) + #plot.apply(lambda ax: ax.set_ylim(-2, 2)) + ##plot.show() + #plot.savefig(f"{save_path}/design_space.png") + + ## Objective Space + #plot = Scatter(title = "Objective Space") + #plot.add(res.F) + #plot.add(pf, plot_type="line", color="black", alpha=0.7) + ##plot.show() + #plot.savefig(f"{save_path}/objective_space.png") + + +if __name__ == "__main__": + main() diff --git a/S1/model_search.py b/S1/model_search.py index ac96f89..266b397 100644 --- a/S1/model_search.py +++ b/S1/model_search.py @@ -50,6 +50,29 @@ def __init__(self, inplanes, outplanes, shadow_bn, stride, activation=nn.ReLU6): self.mix_bn.append(nn.BatchNorm2d(outplanes)) del bn_list + def get_submodule(self, choice) -> nn.ModuleList: + # choice: {'conv', 'rate'} + conv_ids = choice['conv'] # conv_ids, e.g. [0], [1], [2], [0, 1], [0, 2], [1, 2], [0, 1, 2] + m_ = len(conv_ids) # num of selected paths + rate_id = choice['rate'] # rate_ids, e.g. 0, 1 + assert m_ in [1, 2, 3, 4] + assert rate_id in [0, 1] + + submodule = nn.ModuleList() + submodule.append( self.pw[rate_id] ) + if m_ == 1: + submodule.append(self.mix_conv[rate_id][conv_ids[0]]) + else: + for id in conv_ids: + submodule.append(self.mix_conv[rate_id][id]) + # pw + submodule.append(self.pw_linear[rate_id]) + if self.shadow_bn: + submodule.append( self.mix_bn[rate_id][m_ - 1]) + else: + submodule.append(self.mix_bn[rate_id]) + return submodule + def forward(self, x, choice): # choice: {'conv', 'rate'} conv_ids = choice['conv'] # conv_ids, e.g. [0], [1], [2], [0, 1], [0, 2], [1, 2], [0, 1, 2] @@ -128,6 +151,13 @@ def _initialize_weights(self): m.weight.data.uniform_(-init_range, init_range) m.bias.data.zero_() + def get_submodule(self, choice) -> nn.ModuleList: + submodule = nn.ModuleList( + [self.stem, self.last_conv,self.global_pooling, self.classifier ]) + for i in range(self.layers): + submodule.extend( self.Inverted_Block[i].get_submodule(choice[i]) ) + return submodule + def forward(self, x, choice=None): x = self.stem(x) for i in range(self.layers): diff --git a/S1/wnsga2.py b/S1/wnsga2.py new file mode 100644 index 0000000..bd229ff --- /dev/null +++ b/S1/wnsga2.py @@ -0,0 +1,4 @@ +""" +Weighted NSGA-II mentioned by MoGA +""" +pass diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..24bc097 --- /dev/null +++ b/run.sh @@ -0,0 +1,15 @@ +#python S1/train_search.py \ +# --exp_name experiment_name \ +# --m 4\ +# --data_dir ~/.torch/datasets \ +# --seed 2020 + +python S1/eval_search.py \ + --exp_name search_cifar\ + --m 4\ + --data_dir ~/.torch/datasets \ + --model_path ./super_train/experiment_name/super_train_states.pt.tar\ + --batch_size 500\ + --n_generations 40\ + --pop_size 40\ + --n_offsprings 10 diff --git a/super_train/search_cifar/res_high_tradeoff.png b/super_train/search_cifar/res_high_tradeoff.png new file mode 100644 index 0000000..9286561 Binary files /dev/null and b/super_train/search_cifar/res_high_tradeoff.png differ diff --git a/super_train/search_cifar/res_high_tradeoff_acc_latency.png b/super_train/search_cifar/res_high_tradeoff_acc_latency.png new file mode 100644 index 0000000..218d5b7 Binary files /dev/null and b/super_train/search_cifar/res_high_tradeoff_acc_latency.png differ