diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb2963c --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +data +datasets +*.pyc +__pycache__ +output +.vscode \ No newline at end of file diff --git a/demo-cifar.sh b/demo-cifar.sh index 2b7eb77..41952f9 100755 --- a/demo-cifar.sh +++ b/demo-cifar.sh @@ -1,13 +1,18 @@ -echo ID: CIFAR-100 -echo No Sparsity -python ood_eval.py --in-dataset CIFAR-100 -echo With Sparsity p=90 -python ood_eval.py --in-dataset CIFAR-100 --p 90 +methods=(msp odin energy mahalanobis sofl rowl atom) +for method in "${methods[@]}"; do + echo "ID: CIFAR-100" + echo "No Sparsity, method: $method" + python ood_eval.py --in-dataset CIFAR-100 --method $method + echo "With Sparsity p=90, method: $method" + python ood_eval.py --in-dataset CIFAR-100 --p 90 --method $method +done -echo ID: CIFAR-10 -echo No Sparsity -python ood_eval.py --in-dataset CIFAR-10 -echo With Sparsity p=90 -python ood_eval.py --in-dataset CIFAR-10 --p 90 +for method in "${methods[@]}"; do + echo "ID: CIFAR-10" + echo "No Sparsity, method: $method" + python ood_eval.py --in-dataset CIFAR-10 --method $method + echo "With Sparsity p=90, method: $method" + python ood_eval.py --in-dataset CIFAR-10 --p 90 --method $method +done diff --git a/ood_eval.py b/ood_eval.py index c60263a..ef15231 100644 --- a/ood_eval.py +++ b/ood_eval.py @@ -8,6 +8,7 @@ import torchvision import torchvision.transforms as transforms from sklearn.linear_model import LogisticRegressionCV +from tqdm import tqdm import models.densenet as dn import util.svhn_loader as svhn import numpy as np @@ -56,6 +57,8 @@ help='depth of resnet') parser.add_argument('--width', default=4, type=int, help='width of resnet') +parser.add_argument('--ignore_existing', default=False, type=int, + help='whether to ignore existing files and overwrite') parser.set_defaults(argument=True) @@ -77,7 +80,7 @@ def eval_ood_detector(args, mode_args): name = args.name epochs = args.epochs - in_save_dir = os.path.join(base_dir, in_dataset, method, name, 'nat') + in_save_dir = os.path.join(base_dir, in_dataset, method, name, f"p={args.p}", 'nat') if not os.path.exists(in_save_dir): os.makedirs(in_save_dir) @@ -130,49 +133,54 @@ def eval_ood_detector(args, mode_args): else: assert False, 'Not supported model arch: {}'.format(args.model_arch) - - model.eval() model.cuda() if not mode_args['out_dist_only']: t0 = time.time() - f1 = open(os.path.join(in_save_dir, "in_scores.txt"), 'w') - g1 = open(os.path.join(in_save_dir, "in_labels.txt"), 'w') + in_scores_file = os.path.join(in_save_dir, "in_scores.txt") + in_labels_file = os.path.join(in_save_dir, "in_labels.txt") - ########################################In-distribution########################################### - print("Processing in-distribution images") + if os.path.exists(in_scores_file) and os.path.exists(in_labels_file) and not args.ignore_existing: + print("In-distribution scores and labels already exist, skipping...") + else: - N = len(testloaderIn.dataset) - count = 0 - for j, data in enumerate(testloaderIn): - images, labels = data - images = images.cuda() - labels = labels.cuda() - curr_batch_size = images.shape[0] + f1 = open(os.path.join(in_save_dir, "in_scores.txt"), 'w') + g1 = open(os.path.join(in_save_dir, "in_labels.txt"), 'w') - inputs = images + ########################################In-distribution########################################### + print("Processing in-distribution images") - scores = get_score(inputs, model, method, method_args) + N = len(testloaderIn.dataset) + count = 0 + for j, data in enumerate(testloaderIn): + images, labels = data + images = images.cuda() + labels = labels.cuda() + curr_batch_size = images.shape[0] - for score in scores: - f1.write("{}\n".format(score)) + inputs = images - outputs = F.softmax(model(inputs)[:, :num_classes], dim=1) - outputs = outputs.detach().cpu().numpy() - preds = np.argmax(outputs, axis=1) - confs = np.max(outputs, axis=1) + scores = get_score(inputs, model, method, method_args) - for k in range(preds.shape[0]): - g1.write("{} {} {}\n".format(labels[k], preds[k], confs[k])) + for score in scores: + f1.write("{}\n".format(score)) - count += curr_batch_size - # print("{:4}/{:4} images processed, {:.1f} seconds used.".format(count, N, time.time()-t0)) - t0 = time.time() + outputs = F.softmax(model(inputs)[:, :num_classes], dim=1) + outputs = outputs.detach().cpu().numpy() + preds = np.argmax(outputs, axis=1) + confs = np.max(outputs, axis=1) - f1.close() - g1.close() + for k in range(preds.shape[0]): + g1.write("{} {} {}\n".format(labels[k], preds[k], confs[k])) + + count += curr_batch_size + # print("{:4}/{:4} images processed, {:.1f} seconds used.".format(count, N, time.time()-t0)) + t0 = time.time() + + f1.close() + g1.close() if mode_args['in_dist_only']: return @@ -184,7 +192,13 @@ def eval_ood_detector(args, mode_args): if not os.path.exists(out_save_dir): os.makedirs(out_save_dir) - f2 = open(os.path.join(out_save_dir, "out_scores.txt"), 'w') + out_scores_file = os.path.join(out_save_dir, "out_scores.txt") + + if os.path.exists(out_scores_file) and not args.ignore_existing: + print(f"Out-of-distribution scores for {out_dataset} already exist, skipping...") + continue + + f2 = open(out_scores_file, 'w') if not os.path.exists(out_save_dir): os.makedirs(out_save_dir) @@ -222,11 +236,11 @@ def eval_ood_detector(args, mode_args): ###################################Out-of-Distributions##################################### t0 = time.time() - print("Processing out-of-distribution images") + print(f"Processing out-of-distribution images for {out_dataset}") N = len(testloaderOut.dataset) count = 0 - for j, data in enumerate(testloaderOut): + for j, data in tqdm(enumerate(testloaderOut), total=len(testloaderOut)): images, labels = data images = images.cuda() @@ -259,8 +273,8 @@ def eval_ood_detector(args, mode_args): else: args.out_datasets = ['SVHN', 'LSUN', 'LSUN_resize', 'iSUN', 'dtd', 'places365'] - if args.method == 'energy': + if args.method == 'energy' or args.method == 'odin': args.method_args['temperature'] = 1000.0 - eval_ood_detector(args, mode_args) - compute_traditional_ood(args.base_dir, args.in_dataset, args.out_datasets, args.method, args.name) + eval_ood_detector(args, mode_args) + compute_traditional_ood(args.base_dir, args.in_dataset, args.out_datasets, args.method, args.name, p=args.p) diff --git a/precompute.py b/precompute.py index 6a0ba94..771150d 100644 --- a/precompute.py +++ b/precompute.py @@ -80,7 +80,7 @@ score_log[start_ind:end_ind] = score.data.cpu().numpy() if batch_idx % 10 == 0: print(batch_idx) - np.save(cache_name, (feat_log.T, score_log.T, label_log)) + # np.save(cache_name, (feat_log.T, score_log.T, label_log)) else: feat_log, score_log, label_log = np.load(cache_name, allow_pickle=True) feat_log, score_log = feat_log.T, score_log.T diff --git a/util/metrics.py b/util/metrics.py index 3999ad0..3527ea0 100644 --- a/util/metrics.py +++ b/util/metrics.py @@ -120,32 +120,27 @@ def print_results(results, in_dataset, out_dataset, name, method): print(' {val:6.2f}\n'.format(val=100.*results['AUOUT']), end='') print('') - # print(' {val:6.2f}'.format(val=100.*results['FPR']), end='') - # print(' {val:6.2f}'.format(val=100.*results['DTERR']), end='') - # print(' {val:6.2f}'.format(val=100.*results['AUROC']), end='') - # print(' {val:6.2f}'.format(val=100.*results['AUIN']), end='') - # print(' {val:6.2f}'.format(val=100.*results['AUOUT']), end='') -def print_all_results(results, datasets, method): +def print_all_results(results, datasets, method, file=None): mtypes = ['FPR', 'AUROC', 'AUIN'] avg_results = compute_average_results(results) - print(' OOD detection method: ' + method) - print(' ', end='') + print(' OOD detection method: ' + method, file=file) + print(' ', end='', file=file) for mtype in mtypes: - print(' {mtype:6s}'.format(mtype=mtype), end='') + print(' {mtype:6s}'.format(mtype=mtype), end='', file=file) for result, dataset in zip(results,datasets): - print('\n{dataset:12s}'.format(dataset=dataset), end='') - print(' {val:6.2f}'.format(val=100.*result['FPR']), end='') - print(' {val:6.2f}'.format(val=100.*result['AUROC']), end='') - print(' {val:6.2f}'.format(val=100.*result['AUIN']), end='') - - print('\nAVG ', end='') - print(' {val:6.2f}'.format(val=100.*avg_results['FPR']), end='') - print(' {val:6.2f}'.format(val=100.*avg_results['AUROC']), end='') - print(' {val:6.2f}'.format(val=100.*avg_results['AUIN']), end='') - print('') + print('\n{dataset:12s}'.format(dataset=dataset), end='', file=file) + print(' {val:6.2f}'.format(val=100.*result['FPR']), end='', file=file) + print(' {val:6.2f}'.format(val=100.*result['AUROC']), end='', file=file) + print(' {val:6.2f}'.format(val=100.*result['AUIN']), end='', file=file) + + print('\nAVG ', end='', file=file) + print(' {val:6.2f}'.format(val=100.*avg_results['FPR']), end='', file=file) + print(' {val:6.2f}'.format(val=100.*avg_results['AUROC']), end='', file=file) + print(' {val:6.2f}'.format(val=100.*avg_results['AUIN']), end='', file=file) + print('', file=file) def compute_average_results(all_results): mtypes = ['FPR', 'DTERR', 'AUROC', 'AUIN', 'AUOUT'] @@ -163,28 +158,19 @@ def compute_average_results(all_results): return avg_results -def compute_traditional_ood(base_dir, in_dataset, out_datasets, method, name): +def compute_traditional_ood(base_dir, in_dataset, out_datasets, method, name, p): # print('Natural OOD') # print('nat_in vs. nat_out') - known = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name), delimiter='\n') - - known_sorted = np.sort(known) - num_k = known.shape[0] - - if method == 'rowl': - threshold = -0.5 - else: - threshold = known_sorted[round(0.05 * num_k)] + known = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p)) all_results = [] total = 0.0 for out_dataset in out_datasets: - novel = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/{out_dataset}/out_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, out_dataset=out_dataset), delimiter='\n') + novel = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/{out_dataset}/out_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p, out_dataset=out_dataset)) - in_cond = (novel>threshold).astype(np.float32) total += novel.shape[0] results = cal_metric(known, novel, method) @@ -192,31 +178,28 @@ def compute_traditional_ood(base_dir, in_dataset, out_datasets, method, name): all_results.append(results) print_all_results(all_results, out_datasets, method) - # avg_results = compute_average_results(all_results) - # print_results(avg_results, in_dataset, "All", name, method) + print_all_results(all_results, out_datasets, method, file=open('{base_dir}/ood_results_{in_dataset}_{method}_{name}_p={p}.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p), 'w')) -def compute_stat(base_dir, in_dataset, out_datasets, method, name): - # print('Natural OOD') - # print('nat_in vs. nat_out') +def compute_stat(base_dir, in_dataset, out_datasets, method, name, p): - known = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name), delimiter='\n') + known = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p)) print(f"ID mean: {known.mean()} std: {known.std()}") all_mean = [] all_std = [] for out_dataset in out_datasets: - novel = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/{out_dataset}/out_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, out_dataset=out_dataset), delimiter='\n') + novel = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/{out_dataset}/out_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p, out_dataset=out_dataset)) all_mean.append(novel.mean()) all_std.append(novel.std()) print(f"OOD mean: {sum(all_mean) / len(out_datasets)} std: {sum(all_std) / len(out_datasets)}") return -def compute_in(base_dir, in_dataset, method, name): +def compute_in(base_dir, in_dataset, method, name, p): - known_nat = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name), delimiter='\n') + known_nat = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/in_scores.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p)) known_nat_sorted = np.sort(known_nat) num_k = known_nat.shape[0] @@ -225,7 +208,7 @@ def compute_in(base_dir, in_dataset, method, name): else: threshold = known_nat_sorted[round(0.05 * num_k)] - known_nat_label = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/nat/in_labels.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name)) + known_nat_label = np.loadtxt('{base_dir}/{in_dataset}/{method}/{name}/p={p}/nat/in_labels.txt'.format(base_dir=base_dir, in_dataset=in_dataset, method=method, name=name, p=p)) nat_in_cond = (known_nat>threshold).astype(np.float32) nat_correct = (known_nat_label[:,0] == known_nat_label[:,1]).astype(np.float32) @@ -236,8 +219,6 @@ def compute_in(base_dir, in_dataset, method, name): known_nat_fnr = np.mean((1.0 - nat_in_cond)) known_nat_eteacc = np.mean(nat_correct * nat_in_cond) - # print('In-distribution performance:') - # print('FNR: {fnr:6.2f}, Acc: {acc:6.2f}, End-to-end Acc: {eteacc:6.2f}'.format(fnr=known_nat_fnr*100,acc=known_nat_acc*100,eteacc=known_nat_eteacc*100)) print('\t{acc:6.2f}, {eteacc:6.2f}'.format(fnr=known_nat_fnr*100,acc=known_nat_acc*100,eteacc=known_nat_eteacc*100)) return diff --git a/util/svhn_loader.py b/util/svhn_loader.py index 3f4ead4..7112a47 100644 --- a/util/svhn_loader.py +++ b/util/svhn_loader.py @@ -92,7 +92,7 @@ def __getitem__(self, index): if self.target_transform is not None: target = self.target_transform(target) - return img, target.astype(np.long) + return img, target.astype(np.int64) def __len__(self): if self.split == "test": @@ -128,3 +128,10 @@ def download(self): else: md5 = self.split_list[self.split][2] download_url(self.url, self.root, self.filename, md5) + + +if __name__ == "__main__": + dataset = SVHN(root="./data", split="train_and_extra", download=True) + print(len(dataset)) + dataset = SVHN(root="./datasets/ood_datasets/svhn", split="test", download=True) + print(len(dataset)) \ No newline at end of file