Adaptive-TML/main.py at main · simdis/Adaptive-TML · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import copy
import random
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import sklearn.neighbors as neighbors
import sklearn.svm as svm
import torch
import torch.utils.data
import torchvision

import argparse
import os
import time

from tqdm import tqdm

import synthetic_dataset.synthetic_dataset
import synthetic_dataset.rotating_hyperplane_dataset
from audio_utils import transforms as audio_transforms
from audio_utils import spectrogram_dataloader_pytorch as audio_datasets
from audio_utils import spectrogram_dataloader_noisy_pytorch as noisy_audio_datasets
from audio_utils import loaders as audio_loaders
from pytorch_extensions import dataset as image_datasets
from pytorch_extensions import layers as custom_pytorch_layers
from pytorch_extensions import sampler
from pytorch_extensions import torch_utils
from utils import utils

from tinyknn import active_tiny_knn, hybrid_tiny_knn, incremental_knn, condensed_nearest_neighbor, condensing_in_time, \
    active_cdt_functions

import river

from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple


def define_and_parse_flags(parse: bool = True) -> Union[argparse.ArgumentParser, argparse.Namespace]:
    """
    Define the FLAGS of this script.
    :param parse: whether to parse or not the defined flags.
    :return: the parser or the parsed namespace.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument('--seed', type=int, default=int(np.random.rand() * 100))
    parser.add_argument('--num_classes', type=int, default=2)
    parser.add_argument('--dr_policy', default="none",
                        choices=["none", "filter_selection", "filter_selection_class_distance",
                                 "class_distance", "filter_selection_plus",
                                 "filter_selection_class_distance_plus"],
                        help="Dimensionality reduction policy. The possible values are"
                             "'none' (default when a non admissible value is provided), "
                             "'filter_selection', 'filter_selection_class_distance', and "
                             "'class_distance'. 'filter_selection_plus', and "
                             "'filter_selection_class_distance_plus' are equivalent "
                             "to their base version, but with the base_cnn modified "
                             "accordingly to their action. Filter selection requires a "
                             "further array specifying the filter indices to keep named filters_to_keep."
                             " Class distance is automatically learned by the model based on "
                             " the training features. It keeps the class_distance_filters that maximized "
                             "the mean distance among the classes."
                        )
    parser.add_argument('--filters_to_keep', type=str, default="1,2,3,4,5",
                        help="Comma separated number of filters to keep in filter selection mode(s).")
    parser.add_argument('--class_distance_filters', type=int, default=2,
                        help="Number of filters to keep in class_distance mode(s).")
    parser.add_argument('--cnn_fe', default="resnet18",
                        choices=["resnet18", "resnet34", "resnet50", "resnet101", "resnet152"],
                        help="The CNN used as feature extractor.")
    parser.add_argument('--cnn_fe_weights_path', type=str, default="",
                        help="The path to the pth file containing the weights of the CNN FE. **deprecated**")
    parser.add_argument('--feature_layer', type=str, default='maxpool',
                        help="Convolutional Layer at which the features are extracted.")

    parser.add_argument('--data_dir', type=str, required=True,
                        help="The path to the dataset")
    parser.add_argument('--second_data_dir', type=str, default=None,
                        help="The (optional) path to a second dataset. If provided, is used after the change."
                             " It is assumed that the classes are the same in both the datasets.")

    parser.add_argument('--output_dir', type=str, default='./output/')

    parser.add_argument('--image_size', type=int, default=224)
    parser.add_argument('--is_audio', action='store_true', help="Boolean flag that switches among image and audio.")
    parser.add_argument('--add_audio_noise', action='store_true',
                        help="Boolean flag that add noise to audio after change.")
    parser.add_argument('--audio_seconds', type=int, default=1, help="Length of audio waves.")
    parser.add_argument('--sample_rate', type=int, default=22050)
    parser.add_argument('--n_fft', type=int, default=512)
    parser.add_argument('--hop_length', type=int, default=-1)
    parser.add_argument('--top_db', type=int, default=80,
                        help="Cut-off of decibels (default and suggested value is 80db).")
    parser.add_argument('--add_reverb', action='store_true',
                        help="When is_audio and add_audio_noise are true, add reverberation as noise.")
    parser.add_argument('--add_echo', action='store_true',
                        help="When is_audio and add_audio_noise are true, add three echos as noise.")
    parser.add_argument('--add_noise_distortion', action='store_true',
                        help="When is_audio and add_audio_noise are true, add noise distortion.")
    parser.add_argument('--noise_distortion', type=float, default=0.80,
                        help="When is_audio and add_audio_noise are true, distort the noise according to this "
                             "percentage: 1 means no distortion, <1 slow down audio, >1 speeds up audio.")
    parser.add_argument('--echo_delay', type=float, default="100",
                        help="The number of milliseconds after which each echo is added.")
    parser.add_argument('--echo_decay', type=float, default="0.3",
                        help="The echo decay.")
    parser.add_argument('--echo_gain_in', type=float, default=0.7,
                        help="The echo gain in.")
    parser.add_argument('--echo_gain_out', type=float, default=0.8,
                        help="The echo gain out.")
    parser.add_argument('--is_synthetic', action='store_true',
                        help="Boolean flag that forces to use a synthetic dataset "
                             "(the path to dataset in this case is ignored).")
    parser.add_argument('--do_rotating', action='store_true',
                        help="When is_synthetic is provided, use the rotating hyperplane dataset.")
    parser.add_argument('--grid_size', type=int, default=7,
                        help="The size of the synthetic dataset squares.")
    parser.add_argument('--concept_drift_magnitude_mean', type=str, default="0",
                        help="The comma separated magnitude of change per each class mean or a single value for all.")
    parser.add_argument('--concept_drift_magnitude_cov', type=str, default="0",
                        help="The comma separated magnitude of change per each class cov or a single value for all.")
    parser.add_argument('--concept_drift_time', type=int, default=1,
                        help="The number of steps in which the concept drift occurs (1=abrupt, >1=gradual).")
    parser.add_argument('--synthetic_classes_mean_scale', type=str, default="1.0",
                        help="The comma separated width of possible mean values for each class.")
    parser.add_argument('--synthetic_classes_mean_min', type=str, default="0.0",
                        help="The comma separated minimum possible mean values for each class.")
    parser.add_argument('--synthetic_classes_cov_scale', type=str, default="0.5",
                        help="The comma separated width of possible cov values for each class.")
    parser.add_argument('--synthetic_classes_cov_min', type=str, default="0.1",
                        help="The comma separated minimum possible cov values for each class.")

    parser.add_argument('--do_incremental', action='store_true', help="Activate the incremental experiments.")
    parser.add_argument('--do_passive', action='store_true', help="Activate passive experiments.")
    parser.add_argument('--do_active', action='store_true', help="Activate active experiments.")
    parser.add_argument('--skip_base_exps', action='store_true', help="Skip the experiments with SVM and NN.")
    parser.add_argument('--skip_nn_exps', action='store_true', help="Skip the experiments with NN.")

    parser.add_argument('--do_knn_adwin', action='store_true', help="Test against the kNN+ADWIN method.")
    parser.add_argument('--do_knn_adwin_paw', action='store_true',
                        help="Test against the kNN+ADWIN method with the PAW.")
    parser.add_argument('--do_sam_knn', action='store_true', help="Test against the SAM-kNN method.")
    parser.add_argument('--do_soa_without_dl', action='store_true',
                        help="Apply the State of the Art algorithms without the feature extractor and the "
                             "dimensionality reduction operator.")

    parser.add_argument('--incremental_step', type=int, default=1,
                        help="The number of samples to be added at each incremental step.")
    parser.add_argument('--cit_max_samples', type=int, default=10000,
                        help="The memory bound of CIT algorithm (measured as number of samples).")
    parser.add_argument('--window_length', type=int,
                        help="The size of the history window in Active Tiny kNN or the training window "
                             "in Hybrid Tiny kNN (default: --cit_max_samples).")
    parser.add_argument('--samples_per_class_to_test', type=str, default="1,2,3,4,5,10,20,30,40,50,60,70,80,90,100",
                        help="The comma separated list of initial training set sizes to be tested. Each value "
                             "corresponds to the number of samples per each class.")
    parser.add_argument('--base_test_samples', type=int, default=500, help="Number of samples per class in testing.")
    parser.add_argument('--n_binomial', type=int, default=50,
                        help="The value of N when using binomial distribution in CUSUM CDT.")

    parser.add_argument('--nn_lr_base', type=str, default="1e-2,5e-3,1e-3,5e-4,1e-4,5e-5,1e-5",
                        help="The comma separated learning rates to test in NN-base classifier.")
    parser.add_argument('--nn_lr_incremental', type=str, default="1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6, 1e-6",
                        help="The comma separated learning rates to test in NN-base classifier "
                             "during incremental updates.")
    parser.add_argument('--nn_weights_alpha', type=float, default=1e-5,
                        help="The weights L2 regularization coefficient in Adam Optimizer.")
    parser.add_argument('--nn_training_epochs', type=int, default=3,
                        help="The number of epochs the NN classifiers are trained for.")
    parser.add_argument('--nn_batch_size', type=int, default=5,
                        help="The batch size in NN classifiers training.")

    parser.add_argument('--num_readers', type=int, default=8)

    parser.add_argument('--classes_to_change', type=int, default=0,
                        help="Number of classes to change. Must be <= the number of classes.")
    parser.add_argument('--classes_to_add', type=int, default=0,
                        help="Number of classes to add after a change.")

    parser.add_argument('--revert_exps', action='store_true',
                        help="Revert the experiments, i.e., introduce the change immediately and then remove it [Only for audio].")

    # Return the parser or the parsed values according to the parameter 'parse'.
    if parse:
        args = parser.parse_args()
        # Fix default values based on other arguments.
        args.window_length = args.window_length if args.window_length else args.cit_max_samples
        return args
    return parser


def generate_classes(
        flags: argparse.Namespace
) -> Tuple[List[str], List[str]]:
    """
    Part of code that generates the classes.
    :param flags: the namespace of argparse with the parameters.
    :return: a tuple containing the two lists of classes before and after the change.
    If there is no change, the second list corresponds to the first one.
    """
    # Get num_classes classes.
    all_classes = utils.random_subsample_classes(
        directory=flags.data_dir,
        num_classes=flags.num_classes + flags.classes_to_change + flags.classes_to_add
    )
    if flags.classes_to_change or flags.classes_to_add:
        classes_before_change = [x for x in all_classes[:flags.num_classes]]
        classes_after_change = [x for x in all_classes[:flags.num_classes]]
        for ii in range(flags.classes_to_change):
            classes_after_change[ii] = all_classes[flags.num_classes + ii]
        for ii in range(flags.classes_to_add):
            classes_after_change.append(all_classes[flags.num_classes + flags.classes_to_change + ii])
    else:
        classes_before_change = all_classes
        classes_after_change = all_classes

    print('The class(es) before the change is/are {}, and {}.'.format(
        ', '.join(classes_before_change[:-1]), classes_before_change[-1])
    )
    print('The class(es) after the change is/are {}, and {}.'.format(
        ', '.join(classes_after_change[:-1]), classes_after_change[-1])
    )
    return classes_before_change, classes_after_change


def generate_dataloader(
        flags: argparse.Namespace,
        classes_before_change: List[str],
        classes_after_change: List[str]
) -> Tuple[torch.utils.data.DataLoader, torch.utils.data.Sampler, int, List[int]]:
    """
    The part of code that deals with the creation of the dataloader
    :param flags: the namespace of argparse with the parameters.
    :param classes_before_change:
    :param classes_after_change:
    :return: a Tuple with the dataloader, the sampler, the number of images, and the size of the datasets.
    """
    # Define the dataloader.
    # dataset = None
    splits_length = None
    synthetic_dataset_size = \
        (flags.base_test_samples + int(max(flags.samples_per_class_to_test.split(',')))) * flags.num_classes * 2
    if flags.is_synthetic and flags.do_rotating:
        print("Creating Rotating Hyperplane Dataset Instance.")
        dataset = synthetic_dataset.rotating_hyperplane_dataset.RotatingHyperplaneGridDataset(
            grid_size=flags.grid_size,
            mag_change=0.001,  # Fixed according to related literature
            noise_percentage=0.01,  # No reference in related literature
            sigma_percentage=0.01,  # No reference in related literature
            dataset_size=synthetic_dataset_size,
            seed=flags.seed * 20,
            transform=None
        )
    elif flags.is_synthetic:
        print("Creating Synthetic Dataset Instance.")
        class_mean_scale = np.array(flags.synthetic_classes_mean_scale.split(','), dtype=np.float32)
        class_mean_min = np.array(flags.synthetic_classes_mean_min.split(','), dtype=np.float32)
        class_cov_scale = np.array(flags.synthetic_classes_cov_scale.split(','), dtype=np.float32)
        class_cov_min = np.array(flags.synthetic_classes_cov_min.split(','), dtype=np.float32)
        if len(class_mean_scale) == 1:
            class_mean_scale = float(class_mean_scale)
        if len(class_mean_min) == 1:
            class_mean_min = float(class_mean_min)
        if len(class_cov_scale) == 1:
            class_cov_scale = float(class_cov_scale)
        if len(class_cov_min) == 1:
            class_cov_min = float(class_cov_min)
        mean_change_magnitude = np.array(flags.concept_drift_magnitude_mean.split(','), dtype=np.float32)
        cov_change_magnitude = np.array(flags.concept_drift_magnitude_cov.split(','), dtype=np.float32)
        if len(mean_change_magnitude) == 1:
            mean_change_magnitude = float(mean_change_magnitude[0])
        elif len(mean_change_magnitude) == flags.num_classes:
            mean_change_magnitude = np.array(mean_change_magnitude).reshape((flags.num_classes,))
        else:
            mean_change_magnitude = np.array(mean_change_magnitude).reshape(
                (flags.grid_size, flags.grid_size, flags.num_classes)
            )
        if len(cov_change_magnitude) == 1:
            cov_change_magnitude = float(cov_change_magnitude[0])
        elif len(cov_change_magnitude) == flags.num_classes:
            cov_change_magnitude = np.array(cov_change_magnitude).reshape((flags.num_classes,))
        else:
            cov_change_magnitude = np.array(mean_change_magnitude).reshape(
                (flags.grid_size * flags.grid_size, flags.grid_size * flags.grid_size, flags.num_classes)
            )
        dataset = synthetic_dataset.synthetic_dataset.SyntheticMultivariateNormalGridDataset(
            grid_size=flags.grid_size,
            num_classes=flags.num_classes,
            dataset_size=synthetic_dataset_size,
            mean_change_magnitude=mean_change_magnitude,
            mean_change_duration=flags.concept_drift_time,
            cov_change_magnitude=cov_change_magnitude,
            cov_change_duration=flags.concept_drift_time,
            change_beginning=synthetic_dataset_size // 2,
            mean_scale=class_mean_scale,
            mean_min=class_mean_min,
            cov_scale=class_cov_scale,
            cov_min=class_cov_min,
            seed=flags.seed * 20,
            transform=None
        )
    elif flags.is_audio:
        print("Creating Speech Command Dataset Instance.")
        # Create the audio transform
        audio_transform = audio_transforms.spectrogram_transforms(
            n_fft=flags.n_fft,
            hop_length=flags.hop_length if flags.hop_length > 0 else flags.n_fft,
            top_db=flags.top_db
        )
        dataset = audio_datasets.SpectrogramFolder(
            root=flags.data_dir,
            loader=audio_loaders.spectrogram_loader_librosa,
            loader_kwargs={
                "sample_rate": flags.sample_rate,
                "max_seconds": flags.audio_seconds,
            },
            extensions=(".wav", ".mp3"),
            transform=audio_transform,
            class_names=classes_before_change
        )
        if flags.second_data_dir:
            if flags.add_audio_noise:
                second_dataset = noisy_audio_datasets.SpectrogramNoisyFolder(
                    root=flags.data_dir,
                    loader=audio_loaders.spectrogram_loader_librosa,
                    loader_kwargs={
                        "sample_rate": flags.sample_rate,
                        "max_seconds": flags.audio_seconds,
                    },
                    extensions=(".wav", ".mp3"),
                    transform=audio_transform,
                    class_names=classes_after_change,
                    noise_steps=flags.concept_drift_time // flags.num_readers + 1,
                    induce_reverb=flags.add_reverb,
                    induce_speed_distortion=flags.add_noise_distortion,
                    speed_distortion=flags.noise_distortion,
                    induce_echo=flags.add_echo,
                    echo_gain_in=flags.echo_gain_in,
                    echo_gain_out=flags.echo_gain_out,
                    echo_delay=flags.echo_delay,
                    echo_decay=flags.echo_decay
                )
            else:
                # Create the second dataset and join both the datasets.
                second_dataset = audio_datasets.SpectrogramFolder(
                    root=flags.second_data_dir,
                    loader=audio_loaders.spectrogram_loader_librosa,
                    loader_kwargs={
                        "sample_rate": flags.sample_rate,
                        "max_seconds": flags.audio_seconds,
                    },
                    extensions=(".wav", ".mp3"),
                    transform=audio_transform,
                    class_names=classes_after_change
                )
            if flags.revert_exps:
                splits_length = [len(second_dataset), len(dataset) + len(second_dataset)]
                dataset = torch.utils.data.ConcatDataset(
                    [second_dataset, dataset]
                )
            else:
                splits_length = [len(dataset), len(dataset) + len(second_dataset)]
                dataset = torch.utils.data.ConcatDataset(
                    [dataset, second_dataset]
                )
                # dataset.__add__(second_dataset)
    else:
        print("Creating Image Dataset Instance.")
        # Image case
        image_transform = torchvision.transforms.Compose([
            torchvision.transforms.Resize(flags.image_size),
            torchvision.transforms.RandomCrop(flags.image_size),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
        ])
        dataset = image_datasets.ImageFolderWithClassSelection(
            root=flags.data_dir,
            transform=image_transform,
            class_names=classes_before_change
        )
        if flags.second_data_dir:
            # Create the second dataset and join both the datasets.
            second_dataset = image_datasets.ImageFolderWithClassSelection(
                root=flags.data_dir,
                transform=image_transform,
                class_names=classes_after_change
            )
            splits_length = [len(dataset), len(dataset) + len(second_dataset)]
            dataset = torch.utils.data.ConcatDataset(
                [dataset, second_dataset]
            )
            # dataset.__add__(second_dataset)

    # Create the batch sampler
    batch_sampler = sampler.SequentialSamplerWithOneShuffle(
        data_source=dataset,
        splits=splits_length
    )
    # Create the data loader
    dataloader = torch.utils.data.DataLoader(
        dataset,
        # batch_size=1,  # The test dataloader has a fixed batch size of 1
        # shuffle=False,  # To avoid re-shuffling at every "epoch"
        sampler=batch_sampler,
        num_workers=flags.num_readers
    )
    print(f"The number of samples within the dataset is {len(dataset)}")
    return dataloader, batch_sampler, len(dataset), splits_length if splits_length else [len(dataset)]


def identity_dimred_(x: np.ndarray):
    """
    Default dimensionality reduction operator.
    It simply forwards the input.
    :param x: the input numpy array.
    :return: the input without any change.
    """
    return x


def train_class_distance_dimred(
        train_features: np.ndarray, train_labels: np.ndarray,
        num_classes: int, features_shape: Union[np.ndarray, torch.Tensor], num_filters: int
) -> Tuple[Callable, np.ndarray, np.ndarray, np.ndarray]:
    """
    Function that trains the class distance dimensionality reduction operator.
    :param train_features: a numpy array containing the training features.
        The shape is (num_training_samples, *features_shape).
    :param train_labels: a numpy array containing the training labels.
        The shape is (num_training_samples, ).
    :param num_classes: the number of classes in the considered problem.
    :param features_shape: the shape of the training features.
        The feature shape should be (num_filters, width, height).
    :param num_filters: the number of filters to keep.
    :return: a tuple containing: the callable dimensionality reduction operator, the reduced
        training features and labels, the indices of the kept filters.
    """
    mean_image = np.zeros((num_classes, *features_shape))
    for cc in range(num_classes):
        mean_image[cc] = np.mean(
            train_features[train_labels == cc], axis=0
        )
    # Get the filter characterized the by the highest distance among classes
    distances = metrics.pairwise_distances(mean_image.reshape(num_classes, -1))
    sum_of_distances = np.sum(distances, axis=0)
    high_class_distance_filter = np.argsort(-1 * sum_of_distances)[:num_filters]
    high_class_distance_filter = high_class_distance_filter.astype(np.int32)

    # Define the dimred_ function
    def dimred_(x):
        # Reshape the flattened features
        x = x.reshape(features_shape)
        # Extract the filters
        x = x[high_class_distance_filter]
        # Flatten again and return
        return x.reshape((1, -1))

    # Reduce the training features
    bs = train_features.shape[0]
    train_features = train_features.reshape((-1, *features_shape))[:, high_class_distance_filter]
    train_features = train_features.reshape((bs, -1))

    return dimred_, train_features, train_labels, high_class_distance_filter


def learn_dimred(
        dr_args: Dict[str, Any],
        train_features: np.ndarray, train_labels: np.ndarray,
        num_classes: int, features_shape: Union[np.ndarray, torch.Tensor]
) -> Tuple[Callable, np.ndarray, np.ndarray, np.ndarray]:
    """
    Function that defines the class distance dimensionality reduction operator learning.
    It checks the validity of the parameters and starts the learning.
    :param dr_args: the dimensionality reduction arguments. This is a dictionary containing
        the type of dimensionality reduction and its parameters.
    :param train_features: a numpy array containing the training features.
        The shape is (num_training_samples, *features_shape).
    :param train_labels: a numpy array containing the training labels.
        The shape is (num_training_samples, ).
    :param num_classes: the number of classes in the considered problem.
    :param features_shape: the shape of the training features.
        The feature shape should be (num_filters, width, height).
    :return: a tuple containing: the callable dimensionality reduction operator, the reduced
        training features and labels, the indices of the kept filters.
    """
    if dr_args['type'] == "class_distance":
        return train_class_distance_dimred(
            train_features=train_features, train_labels=train_labels,
            num_classes=num_classes, features_shape=features_shape,
            num_filters=dr_args['filters']
        )
    else:
        raise ValueError(f"Invalid dr_args fields. Accepted dr_args['type'] are [class_distance].")


def compute_training_features(
        num_training_samples: int, num_classes: int, features_size: int,
        iterator: Iterator, cnn_forward_fn: Callable, cnn_forward_args: Optional[Tuple[Any, ...]] = None,
        _verbose: bool = False
) -> Tuple[np.ndarray, np.ndarray, int]:
    """
    Function that extracts the features of the initial training set.
    :param num_training_samples: the number of samples per class to be computed.
    :param num_classes: the number of classes.
    :param features_size: the size of the features.
    :param iterator: the dataloader iterator. It should provides at each iteration a tuple
        containing the sample and its label.
    :param cnn_forward_fn: the feature extractor forward function.
    :param cnn_forward_args: the feature extractor forward function arguments.
    :param _verbose: whether print verbose messages or not.
    :return: a tuple containing the arrays of training features and labels, plus
        an integer that contains the number of iterations done.
    """
    training_features = np.zeros((num_training_samples * num_classes, features_size))
    training_labels = np.zeros(num_training_samples * num_classes)

    if cnn_forward_args is None:
        cnn_forward_args = tuple()  # Empty tuple

    # Get the training samples from test dataloader, respecting the proportions among classes.
    effective_num_of_samples = 0
    ii = 0
    class_counter = np.ones(num_classes) * num_training_samples
    tot_images = np.sum(class_counter)
    while tot_images > 0:
        # Get the image and its features
        im_, lb_ = next(iterator)
        if _verbose:
            print('Iteration {} -- Label {} -- Class Counter {}'.format(
                effective_num_of_samples, lb_, class_counter))
        # Save features and label
        if class_counter[lb_]:
            ft_ = torch.flatten(cnn_forward_fn(im_, *cnn_forward_args))
            training_features[ii] = ft_.data.numpy()
            training_labels[ii] = lb_.data.numpy()
            ii += 1
            class_counter[lb_] -= 1
            tot_images -= 1
            if _verbose:
                print('Saving! New class counter {}'.format(class_counter))
        # Count the effective number of samples
        effective_num_of_samples += 1

    return training_features, training_labels, effective_num_of_samples


def compute_training_features_nodl(
        num_training_samples: int, num_classes: int, features_size: int,
        iterator: Iterator, _verbose: bool = False
) -> Tuple[np.ndarray, np.ndarray, int]:
    """
    Function that extracts the features of the initial training set without the deep learning part.
    :param num_training_samples: the number of samples per class to be computed.
    :param num_classes: the number of classes.
    :param features_size: the size of the features.
    :param iterator: the dataloader iterator. It should provides at each iteration a tuple
        containing the sample and its label.
    :param _verbose: whether print verbose messages or not.
    :return: a tuple containing the arrays of training features and labels, plus
        an integer that contains the number of iterations done.
    """
    training_features = np.zeros((num_training_samples * num_classes, features_size))
    training_labels = np.zeros(num_training_samples * num_classes)

    # Get the training samples from test dataloader, respecting the proportions among classes.
    effective_num_of_samples = 0
    ii = 0
    class_counter = np.ones(num_classes) * num_training_samples
    tot_images = np.sum(class_counter)
    while tot_images > 0:
        # Get the image and its features
        im_, lb_ = next(iterator)
        if _verbose:
            print('Iteration {} -- Label {} -- Class Counter {}'.format(
                effective_num_of_samples, lb_, class_counter))
        # Save features and label
        if class_counter[lb_]:
            ft_ = torch.flatten(im_)
            training_features[ii] = ft_.data.numpy()
            training_labels[ii] = lb_.data.numpy()
            ii += 1
            class_counter[lb_] -= 1
            tot_images -= 1
            if _verbose:
                print('Saving! New class counter {}'.format(class_counter))
        # Count the effective number of samples
        effective_num_of_samples += 1

    return training_features, training_labels, effective_num_of_samples


def extract_training_features(
        training_features: np.ndarray, training_labels: np.ndarray, num_samples_per_class: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Function that provides among already extracted features the given number of samples per class.
    :param training_features: the training features of shape (num_samples, features_size).
    :param training_labels: the training labels of shape (num_samples, ).
    :param num_samples_per_class: the number of samples
    :return: the first samples of each class (and their labels) up to the desired number.
    """
    assert num_samples_per_class * np.size(np.unique(training_labels)) <= np.size(training_labels), \
        "The number of samples per class multiplied by the number of classes should be " \
        "smaller or equal the number of samples"
    # Compute the indices to keep
    idx_to_keep = [np.where(training_labels == c)[0] for c in np.unique(training_labels)]
    # Keep the first num_samples_per_class in each array
    idx_to_keep = [a[:num_samples_per_class] for a in idx_to_keep]
    # Join all the arrays together
    idx_to_keep = np.sort(np.concatenate(idx_to_keep))
    return training_features[idx_to_keep], training_labels[idx_to_keep]


def extract_next_data(
        test_iterator: Iterator, time_results_dict: dict, base_cnn: Callable, dimred_: Callable, current_idx: int,
        apply_deep_learning: bool = True
) -> Tuple[np.ndarray, torch.Tensor]:
    """
    An utility function that extracts the features in input to the kNN-based classifier and the corresponding label
    for the subsequent and optional testing/adaptation.
    :param test_iterator: the dataset iterator.
    :param time_results_dict: the dictionary where to store the extraction time statistics at the keys "dl" and "fe_dr".
        It is assumed that at those keys there is an array of shape (num_tested_cases, ).
    :param base_cnn: the feature extractor PyTorch module.
    :param dimred_: the dimensionality reduction callable.
    :param current_idx: the index at where store the time statistics within the time_results_dict.
    :param apply_deep_learning: whether to apply the feature extractor and the dimensionality reduction on new data.
    :return: a Tuple containing the features and the label as properly shaped numpy arrays.
    """
    # Get the sample
    st_time = time.time()
    im_, lb_ = next(test_iterator)
    time_results_dict["dl"][current_idx] += time.time() - st_time
    # Compute feature extractor + dimensionality reduction
    if apply_deep_learning:
        st_time = time.time()
        ft_ = torch.flatten(base_cnn(im_))  # FE with flattening.
        ft_ = dimred_(ft_.data.numpy().reshape(1, -1))  # DR
        time_results_dict["fe_dr"][current_idx] += time.time() - st_time
    else:
        # The output is simply the input.
        ft_ = im_.data.numpy()

    return ft_, lb_


def generate_cnn(
        flags: argparse.Namespace,
        require_fix: bool = False
) -> torch.nn.Module:
    """
    Generate the Feature Extractor along with its dimensionality reduction operator.
    :param flags: the namespace of argparse with the parameters.
    :param require_fix: whether to fix the names of the weights within the downloaded pth.
    :return: the pytorch based feature extractor.
    """
    if require_fix:
        # resnet_mg2.convert_weights_mapping(
        #     flags.cnn_fe, flags.cnn_fe_weights_path, flags.cnn_fe_weights_path
        # )
        pass
    if "resnet" in flags.cnn_fe:
        # base_cnn = resnet_mg2.load_resnet(
        #     name=flags.cnn_fe,
        #     pretrained=True,
        #     requires_grad=False,
        #     weights_path=flags.cnn_fe_weights_path,
        #     num_classes=flags.num_classes
        # )
        # Asks directly for the torchvision model.
        base_cnn = torchvision.models.resnet18(pretrained=True)
    else:
        raise ValueError(f"Unsupported CNN: {flags.cnn_fe}\n")

    return base_cnn


def generate_dimensionality_reduction_operator(
        flags: argparse.Namespace,
        feature_extractor: torch.nn.Module
) -> Tuple[torch.nn.Sequential, bool, Optional[Dict[str, Any]]]:
    """
    Generate the dimensionality reduction operator.
    :param flags: the namespace of argparse with the parameters.
    :param feature_extractor: the feature extractor pytorch module.
    :return:
    """
    output_ = torch.nn.Sequential()
    dr_to_train = False
    dr_args = None
    if "plus" in flags.dr_policy:
        # Define the modified CNN.
        torch_utils.append_layers_with_reduced_filters(
            sequential=feature_extractor, feature_layer=flags.feature_layer,
            copy_cnn=output_, filters_to_keep=flags.filters_to_keep.split(','))
    else:
        output_.add_module("fe", feature_extractor)
        if "filter_selection" in flags.dr_policy:
            dr_ = custom_pytorch_layers.FilterSelectionLayer(
                filters_to_keep=flags.filters_to_keep
            )
            output_.add_module("dr", dr_)
        if "class_selection" in flags.dr_policy:
            # Set to true the need for training class specific DR operator.
            dr_to_train = True
            dr_args = {
                "type": "class_distance",
                "filters": flags.class_distance_filters
            }
    return output_, dr_to_train, dr_args


def main(flags: argparse.Namespace) -> None:
    """
    Main function.
    :type flags: argparse.Namespace
    :param flags: the namespace of argparse with the parameters.
    :return: Nothing.
    """
    print('FLAGS: ')
    for k in vars(flags):
        print('{} : {}'.format(k, vars(flags)[k]))

    # Create output directory, if it does not exist
    if not os.path.exists(flags.output_dir):
        os.makedirs(flags.output_dir)

    # Fix the seed.
    # todo: replace np.random.seed with a random_generator
    random.seed(flags.seed)
    np.random.seed(flags.seed)
    torch.manual_seed(flags.seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    # Define the classes.
    classes_before_change, classes_after_change = generate_classes(flags)

    # Define the dataloader
    dataloader, batch_sampler, test_images, datasets_splits = generate_dataloader(
        flags, classes_before_change, classes_after_change
    )

    # Define the CNN's feature extractor.
    base_cnn = generate_cnn(flags)

    # Define Dimensionality Reduction operator.
    base_cnn, dr_to_train, dr_args = generate_dimensionality_reduction_operator(
        flags, feature_extractor=base_cnn
    )

    # Compute features size
    if flags.is_synthetic:
        input_shape = (3, flags.grid_size, flags.grid_size)
    elif flags.is_audio:
        input_shape, _ = next(iter(dataloader))
        input_shape = input_shape.shape[1:]
    else:
        # Image
        input_shape = (3, flags.image_size, flags.image_size)

    features_size = torch_utils.infer_end_of_cnn_shape(
        shape=input_shape,
        activation=base_cnn.forward
    )
    features_shape = torch_utils.infer_end_of_cnn_shape(
        shape=input_shape,
        activation=base_cnn.forward,
        flatten=False
    )
    print(f"The extracted features have a shape {features_shape} --> (size {features_size})")

    # Compute some constants
    num_datasets = 2 if flags.is_synthetic else len(datasets_splits)
    num_test_samples = flags.base_test_samples * flags.num_classes * num_datasets
    num_test_dataloader_samples_to_skip = \
        datasets_splits[0] - (num_test_samples // num_datasets)  # Skip only on the first dataset!
    if flags.is_synthetic:
        num_test_dataloader_samples_to_skip = datasets_splits[0] - num_test_samples

    samples_per_class_to_test = np.array(flags.samples_per_class_to_test.split(','), dtype=int)
    nn_lr0_to_test = np.array(flags.nn_lr_base.split(','), dtype=float)
    nn_lr0_incremental_to_test = np.array(flags.nn_lr_incremental.split(','), dtype=float)
    nn_base_names = [f"c_{lr_:.0e}" for lr_ in nn_lr0_to_test]
    nn_inc_names = [f"i_{lr_c:.0e}_{lr_i:.0e}" for lr_c in nn_lr0_to_test for lr_i in nn_lr0_incremental_to_test]
    num_nn_grid = len(nn_base_names)
    num_nn_inc_grid = len(nn_inc_names)

    num_comparisons = np.size(samples_per_class_to_test)
    num_incremental_comparisons = np.max(samples_per_class_to_test) * flags.num_classes // flags.incremental_step

    # Define the features to be used in the following
    test_iterator = iter(dataloader)
    start_time = time.time()
    training_features_all, training_labels_all, _ = \
        compute_training_features(
            num_training_samples=np.max(samples_per_class_to_test),
            num_classes=flags.num_classes,
            features_size=int(features_size),
            iterator=test_iterator,
            cnn_forward_fn=base_cnn.forward
        )
    print(f"Features extracted in {time.time() - start_time:.3f} seconds.")

    ####################################################################################################################
    # Base exps (they can be considered as incremental exps).
    # In this experiment, the proposed solution along with other baseline classifiers is tested.
    ####################################################################################################################
    # Define base samples data-structures
    accuracy = {
        "knn": np.zeros(num_comparisons),
        "svm": np.zeros(num_comparisons),
        "nn": np.zeros((num_nn_grid, num_comparisons)),
        "nni": np.zeros((num_nn_inc_grid, num_comparisons))
    }
    predictions = {
        "labels": np.zeros(num_test_samples, dtype=int),
        "knn": np.zeros((num_comparisons, num_test_samples, flags.num_classes)),
        "svm": np.zeros((num_comparisons, num_test_samples, flags.num_classes)),
        "nn": np.zeros((num_nn_grid, num_comparisons, num_test_samples, flags.num_classes)),
        "nni": np.zeros((num_nn_inc_grid, num_comparisons, num_test_samples, flags.num_classes))
    }
    train_time = {
        "knn": np.zeros(num_comparisons),
        "svm": np.zeros(num_comparisons),
        "nn": np.zeros(num_comparisons),
        "nni": np.zeros(num_comparisons)
    }
    test_time = {
        "dl": np.zeros(num_comparisons),
        "fe_dr": np.zeros(num_comparisons),
        "knn": np.zeros(num_comparisons),
        "svm": np.zeros(num_comparisons),
        "nn": np.zeros(num_comparisons),
        "nni": np.zeros(num_comparisons)
    }
    class_distance_filter_stats = np.zeros((num_comparisons, flags.class_distance_filters))
    # Start experiments
    if not flags.skip_base_exps:
        for ii, samples_per_class in enumerate(tqdm(samples_per_class_to_test)):
            # Extract the features with the current number of classes.
            training_features, training_labels = \
                extract_training_features(
                    training_features=training_features_all,
                    training_labels=training_labels_all,
                    num_samples_per_class=samples_per_class
                )

            # Train dimensionality reduction operator, if any.
            if dr_to_train:
                dimred_, training_features, training_labels, fs_ = \
                    learn_dimred(
                        dr_args=dr_args,
                        train_features=training_features,
                        train_labels=training_labels,
                        num_classes=flags.num_classes,
                        features_shape=features_shape
                    )
                class_distance_filter_stats[ii] = fs_
                # Define reduced features size
                realtime_features_size = training_features.shape[1]
            else:
                # Define dimred as the identity.
                dimred_ = identity_dimred_
                # Set the value of realtime features size
                realtime_features_size = features_size

            # Train
            # 1) kNN
            start_time = time.time()
            n_neighbors = int(np.ceil(np.sqrt(samples_per_class * flags.num_classes)))
            knn_ = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors)
            knn_.fit(training_features, training_labels)
            train_time["knn"][ii] = time.time() - start_time

            # 2) SVM
            start_time = time.time()
            svm_ = svm.SVC()
            svm_.fit(training_features, training_labels)
            train_time["svm"][ii] = time.time() - start_time

            # 3) NN/NNI
            if not flags.skip_nn_exps:
                start_time = time.time()
                nn_to_train = list()
                opt_list = list()
                loss_criterion = torch.nn.CrossEntropyLoss()
                # Create the NN-FC1 classifiers and their optimizers.
                for lr_ in nn_lr0_to_test:
                    nn_fc1 = torch.nn.Linear(
                        in_features=realtime_features_size,
                        out_features=flags.num_classes,
                    )
                    nn_to_train.append(nn_fc1)
                    opt_ = torch.optim.Adam(
                        nn_fc1.parameters(),
                        lr=lr_,
                        weight_decay=flags.nn_weights_alpha
                    )
                    opt_list.append(opt_)
                    # Define a data loader from numpy arrays
                training_features_dataset = \
                    torch.utils.data.TensorDataset(
                        torch.Tensor(training_features), torch.Tensor(training_labels).long()
                    )
                training_features_dataloader = torch.utils.data.DataLoader(
                    training_features_dataset, batch_size=flags.nn_batch_size, shuffle=True
                )
                # Train.
                for ee in range(flags.nn_training_epochs):
                    for kk, (inputs_, lbls_) in enumerate(training_features_dataloader):
                        for (nn_clf_, opt_) in zip(nn_to_train, opt_list):
                            # Zeroes gradients.
                            opt_.zero_grad()
                            # Forward
                            outputs = nn_clf_.forward(inputs_)
                            # Compute loss
                            loss = loss_criterion(outputs, lbls_)
                            # Train step
                            loss.backward()
                            opt_.step()
                # Save train time. It is the mean of all the NN classifiers.
                train_time["nn"][ii] = (time.time() - start_time) / num_nn_grid
                train_time["nni"][ii] = train_time["nn"][ii]
                # Delete useless data structures
                del training_features_dataset
                del training_features_dataloader

                # Create the NN-FC1s and the optimizers for following steps.
                nn_base = list()
                nn_incr = list()
                opt_list.clear()  # Empty the optimizer list.
                for nn_fc1 in nn_to_train:
                    # Copy "base" version
                    nn_base.append(copy.deepcopy(nn_fc1))
                    # Copy an incremental version along with its optimizer for each possible learning rate
                    for lr_i in nn_lr0_incremental_to_test:
                        nn_fc1_copy = copy.deepcopy(nn_fc1)
                        nn_incr.append(nn_fc1_copy)
                        opt_ = torch.optim.Adam(
                            nn_fc1_copy.parameters(),
                            lr=lr_i,
                            weight_decay=flags.nn_weights_alpha
                        )
                        opt_list.append(opt_)
                del nn_to_train  # Remove "originals"

            # Test
            errors_knn_ = np.zeros(num_test_samples)
            errors_svm_ = np.zeros(num_test_samples)
            errors_nn_ = np.zeros((num_nn_grid, num_test_samples))
            errors_nni_ = np.zeros((num_nn_inc_grid, num_test_samples))

            # Skip training samples by updating the sampler
            batch_sampler.update_start(start=num_test_dataloader_samples_to_skip)
            test_iterator = iter(dataloader)
            # for _ in range(num_test_dataloader_samples_to_skip):
            #     next(test_iterator)

            for jj in tqdm(range(num_test_samples)):
                # Get the sample
                ft_, lb_ = extract_next_data(
                    test_iterator=test_iterator,
                    time_results_dict=test_time,
                    base_cnn=base_cnn,
                    dimred_=dimred_,
                    current_idx=ii
                )
                # KNN Classification
                st_time = time.time()
                pred_ = knn_.predict_proba(ft_)
                predicted_label = np.argmax(pred_)
                errors_knn_[jj] = (not predicted_label == lb_.data.numpy())
                predictions["knn"][ii, jj] = pred_
                test_time["knn"][ii] += time.time() - st_time
                # SVM Classification
                st_time = time.time()
                pred_ = svm_.decision_function(ft_)
                predicted_label = svm_.predict(ft_)
                errors_svm_[jj] = (not predicted_label == lb_.data.numpy())
                if flags.num_classes > 2:
                    predictions["svm"][ii, jj] = pred_
                else:
                    predictions["svm"][ii, jj, 0] = pred_  # Save a value only for the first class.
                test_time["svm"][ii] += time.time() - st_time
                # NN Classification
                if not flags.skip_nn_exps:
                    # NN Classification
                    with torch.no_grad():
                        st_time = time.time()
                        for nn_i, nn_fc1 in enumerate(nn_base):
                            pred_ = nn_fc1.forward(torch.Tensor(ft_))
                            _, predicted_label = torch.max(pred_.data, 1)
                            errors_nn_[nn_i, jj] = (predicted_label != lb_)
                            predictions["nn"][nn_i, ii, jj] = pred_.data.numpy()
                        test_time["nn"][ii] += (time.time() - st_time) / num_nn_grid
                    # NNI Classification
                    st_time = time.time()
                    for nn_i, (nn_fc1, opt_) in enumerate(zip(nn_incr, opt_list)):