hover_next_train/hp_search.py at main · jakebytes/hover_next_train · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import os
import torch
from torch.utils.data import DataLoader
import numpy as np

from src.multi_head_unet import get_model, load_checkpoint
from src.inference_utils import run_inference
from src.post_proc_utils import (
    prep_regression,
    evaluate,
)
from src.constants import CLASS_NAMES, CLASS_NAMES_PANNUKE
import toml

from src.spatial_augmenter import SpatialAugmenter
from src.data_utils import SliceDataset, PANNUKE_FOLDS

from src.color_conversion import color_augmentations  # , get_normalize

import json
from tqdm.auto import tqdm
import pandas as pd
import argparse

torch.backends.cudnn.benchmark = True
torch.manual_seed(42)

aug_params_slow = {
    "mirror": {"prob_x": 0.5, "prob_y": 0.5, "prob": 0.85},
    "translate": {"max_percent": 0.05, "prob": 0.0},
    "scale": {"min": 0.8, "max": 1.2, "prob": 0.0},
    "zoom": {"min": 0.8, "max": 1.2, "prob": 0.0},
    "rotate": {"rot90": True, "prob": 0.85},
    "shear": {"max_percent": 0.1, "prob": 0.0},
    "elastic": {"alpha": [120, 120], "sigma": 8, "prob": 0.0},
}


def find_hyperparameters(
    ds, models, name, nclasses=7, class_names=CLASS_NAMES, rank=0, random_seed=42
):
    color_aug_fn = color_augmentations(False, s=0.2, rank=rank)
    # normalization = get_normalize(use_norm=params["dataset"] == "pannuke")
    aug = SpatialAugmenter(aug_params_slow, random_seed=random_seed)
    data_loader = DataLoader(
        ds,
        batch_size=params["validation_batch_size"],
        shuffle=False,
        prefetch_factor=4,
        num_workers=params["num_workers"],
    )

    pred_emb_list, pred_class_list, gt_list, _ = run_inference(
        data_loader, models, aug, color_aug_fn, params["tta"], rank=rank
    )
    gt_regression = prep_regression(gt_list, nclasses=nclasses, class_names=class_names)

    out_dict = {}
    if params["eval_criteria"] != "":
        for criterium in params["eval_criteria"].split("|"):
            print(criterium, "| searching best fg threshold", flush=True)
            best_seed_thresh_cl = [0.3] * nclasses
            optim_list_global = []
            fg_threshs = np.linspace(0.1, 0.9, 9)
            for fg_thresh in fg_threshs:
                print("FG:", fg_thresh, flush=True)
                eval_dict = evaluate(
                    pred_emb_list,
                    pred_class_list,
                    gt_regression,
                    gt_list,
                    [fg_thresh] * nclasses,
                    best_seed_thresh_cl,
                    params,
                    criterium,
                    nclasses,
                    class_names,
                )
                optim_list_global.append(eval_dict["optim"])

            best_idx = np.stack(optim_list_global)
            best_fg_thresh_cl = [fg_threshs[i] for i in best_idx.argmax(0)]
            out_dict[f"best_fg_{criterium}"] = best_fg_thresh_cl
            optim_list_global = []

            print(criterium, "| searching best seed threshold", flush=True)

            seed_threshs = np.linspace(0.1, 0.9, 9)
            for seed_thresh in seed_threshs:
                print("Seed:", seed_thresh, flush=True)
                eval_dict = evaluate(
                    pred_emb_list,
                    pred_class_list,
                    gt_regression,
                    gt_list,
                    best_fg_thresh_cl,
                    [seed_thresh] * nclasses,
                    params,
                    criterium,
                    nclasses,
                    class_names,
                )
                optim_list_global.append(eval_dict["optim"])
            best_idx = np.stack(optim_list_global)
            best_seed_thresh_cl = [seed_threshs[i] for i in best_idx.argmax(0)]
            out_dict[f"best_seed_{criterium}"] = best_seed_thresh_cl
            print(criterium)
            print(best_fg_thresh_cl)
            print(best_seed_thresh_cl)

    with open(os.path.join(params["experiment"], name + "_param_dict.json"), "w") as f:
        json.dump(out_dict, f)


def main(nclasses, params, rank=0):
    # load model
    model = get_model(
        enc=params["encoder"],
        out_channels_cls=params["out_channels_cls"],
        out_channels_inst=params["inst_channels"],
    ).to(rank)
    cp_path = os.path.join(params["experiment"], "train", params["checkpoint_path"])
    model, _, _ = load_checkpoint(model, cp_path, rank=0)
    model.eval()
    if params["dataset"] == "pannuke":
        _, test_f = PANNUKE_FOLDS[int(params["fold"]) - 1]
        i = test_f + 1
        raw_fold = np.load(
            os.path.join(params["data_path"], "images", "fold" + str(i), "images.npy"),
            mmap_mode="r",
        )
        gt_fold = np.load(
            os.path.join(params["data_path"], "masks", "fold" + str(i), "labels.npy"),
            mmap_mode="r",
        )
        ds_list = [SliceDataset(raw=raw_fold, labels=gt_fold)]
        ds_names = ["pannuke_test"]
        class_names = CLASS_NAMES_PANNUKE
    else:
        # Mitosis dataset test set (real annotations)
        x_mit_test = np.load(os.path.join(params["data_path_mit"], "test_ds/test_img.npy"))
        y_mit_test = np.load(os.path.join(params["data_path_mit"], "test_ds/test_lab.npy"))

        mit_test_ds = SliceDataset(raw=x_mit_test, labels=y_mit_test)

        # Lizard dataset test set
        x_liz_test = np.load(os.path.join(params["data_path_liz"], "test_images.npy"))
        y_liz_test = np.load(os.path.join(params["data_path_liz"], "test_labels.npy"))

        liz_test_ds = SliceDataset(raw=x_liz_test, labels=y_liz_test)
        ds_list = [mit_test_ds, liz_test_ds]
        ds_names = ["mit_test", "liz_test"]
        class_names = CLASS_NAMES
    print("evaluating for ", class_names, "on", ds_names)
    for ds, name in zip(ds_list, ds_names):
        find_hyperparameters(
            ds,
            [model],
            name,
            nclasses=nclasses,
            class_names=class_names,
            rank=rank,
            random_seed=params["seed"],
        )
    print("done")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config",
        type=str,
        default=None,
        help="path to .toml file of experiment. e.g. lizard_exp_1/params.toml",
    )
    parser.add_argument(
        "--checkpoint",
        type=str,
        default="best_model",
        help="checkpoint to load. e.g. best_model, checkpoint_step_10000. Use this to evaluate other checkpoints",
    )
    args = parser.parse_args()
    params = toml.load(args.config)
    print(
        "loaded config for",
        params["experiment"],
        "\n starting hyperparameter search...",
        flush=True,
    )
    params["checkpoint_path"] = args.checkpoint
    rank = torch.cuda.current_device()
    nclasses = 5 if params["dataset"] == "pannuke" else 7
    main(nclasses, params, rank)