diff --git a/mart/configs/datamodule/coco_yolov3.yaml b/mart/configs/datamodule/coco_yolov3.yaml index 27be09d1..d8221803 100644 --- a/mart/configs/datamodule/coco_yolov3.yaml +++ b/mart/configs/datamodule/coco_yolov3.yaml @@ -12,7 +12,7 @@ train_dataset: fill: 0.5 - _target_: mart.transforms.Resize size: [416, 416] - - _target_: mart.transforms.ConvertBoxesToCXCYHW + - _target_: mart.transforms.ConvertBoxesToCXCYWH - _target_: mart.transforms.RemapLabels - _target_: mart.transforms.PackBoxesAndLabels num_classes: 80 diff --git a/mart/configs/experiment/COCO_YOLOv4.yaml b/mart/configs/experiment/COCO_YOLOv4.yaml new file mode 100644 index 00000000..b0abd4ab --- /dev/null +++ b/mart/configs/experiment/COCO_YOLOv4.yaml @@ -0,0 +1,33 @@ +# @package _global_ + +defaults: + - override /datamodule: coco_yolov3 + - override /model: yolov4 + - override /optimization: super_convergence + - override /metric: average_precision + +task_name: "COCO_YOLOv4" +tags: ["evaluation"] + +optimized_metric: "test_metrics/map" + +trainer: + # 117,266 training images, 6 epochs, batch_size=16, 43,974.75 + max_steps: 43975 + # FIXME: "nms_kernel" not implemented for 'BFloat16', torch.ops.torchvision.nms(). + precision: 32 + +datamodule: + num_workers: 32 + ims_per_batch: 16 + + collate_fn: + path: mart.datamodules.coco.yolov4_collate_fn + +model: + optimizer: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0005 + + training_metrics: null diff --git a/mart/configs/experiment/COCO_YOLOv4_ShapeShifter.yaml b/mart/configs/experiment/COCO_YOLOv4_ShapeShifter.yaml new file mode 100644 index 00000000..bf2d7289 --- /dev/null +++ b/mart/configs/experiment/COCO_YOLOv4_ShapeShifter.yaml @@ -0,0 +1,179 @@ +# @package _global_ + +defaults: + - /attack/perturber@model.modules.perturbation: default + - /attack/perturber/initializer@model.modules.perturbation.initializer: uniform + - /attack/perturber/projector@model.modules.perturbation.projector: range + - /attack/composer@model.modules.input_adv: warp_composite + - /attack/gradient_modifier@model.gradient_modifier: lp_normalizer + - override /optimization: super_convergence + - override /datamodule: coco_yolov3 + - override /model: yolov4 + - override /metric: average_precision + - override /callbacks: + [ + model_checkpoint, + lr_monitor, + perturbation_visualizer, + gradient_monitor, + attack_in_eval_mode, + no_grad_mode, + ] + +task_name: "COCO_YOLOv4_ShapeShifter" +tags: ["adv"] + +optimized_metric: "test_metrics/map" + +trainer: + # 64115 training images, batch_size=16, FLOOR(64115/16) = 4007 + max_steps: 40070 # 10 epochs + # mAP can be slow to compute so limit number of images + limit_val_batches: 100 + limit_test_batches: 100 + precision: 32 + +callbacks: + model_checkpoint: + monitor: "validation_metrics/map" + mode: "min" + + attack_in_eval_mode: + module_classes: + - _target_: hydra.utils.get_class + path: torch.nn.BatchNorm2d + + no_grad_mode: + module_names: "model.yolov4" + + perturbation_visualizer: + perturbation: "model.perturbation.perturbation" + frequency: 500 + +datamodule: + num_workers: 16 + ims_per_batch: 8 + + train_dataset: + annFile: ${paths.data_dir}/coco/annotations/person_instances_train2017.json + val_dataset: + annFile: ${paths.data_dir}/coco/annotations/person_instances_val2017.json + test_dataset: + annFile: ${paths.data_dir}/coco/annotations/person_instances_val2017.json + + collate_fn: + path: mart.datamodules.coco.yolov4_collate_fn + +model: + modules: + perturbation: + size: [3, 416, 234] + + initializer: + min: 0.49 + max: 0.51 + + projector: + min: 0.0 + max: 1.0 + + input_adv: + warp: + _target_: torchvision.transforms.Compose + transforms: + - _target_: mart.transforms.ColorJitter + brightness: [0.5, 1.5] + contrast: [0.5, 1.5] + saturation: [0.5, 1.0] + hue: [-0.05, 0.05] + - _target_: torchvision.transforms.RandomAffine + degrees: [-5, 5] + translate: [0.1, 0.25] + scale: [0.4, 0.6] + shear: [-3, 3, -3, 3] + interpolation: 2 # BILINEAR + clamp: [0, 1] + + optimizer: + lr: 0.05 # ims_per_batch / orig_ims_per_batch * orig_lr = (8 / 16) * 0.1 + momentum: 0.9 + + lr_scheduler: + scheduler: + three_phase: true + + gradient_modifier: null + + training_sequence: + seq005: "perturbation" + seq006: "input_adv" + seq010: + yolov4: + x: "input_adv" + seq020: + losses: + perturbation: "perturbation" + seq030: + loss: + _call_with_args_: + - "losses.hide_objects_loss" + - "losses.total_variation" + weights: + - 1 + - 0.0001 + seq050: + output: + total_variation: "losses.total_variation" + + training_metrics: null + training_step_log: + - loss + - total_loss + - coord_loss + - obj_loss + - noobj_loss + - class_loss + - hide_objects_loss + - target_class_loss + - hide_target_objects_loss + - correct_target_class_loss + - target_count + - score_count + - target_score_count + - total_variation + + validation_sequence: + seq005: "perturbation" + seq006: "input_adv" + seq010: + yolov4: + x: "input_adv" + seq020: + losses: + perturbation: "perturbation" + seq030: + loss: + _call_with_args_: + - "losses.hide_objects_loss" + - "losses.total_variation" + weights: + - 1 + - 0.0001 + + test_sequence: + seq005: "perturbation" + seq006: "input_adv" + seq010: + yolov4: + x: "input_adv" + seq020: + losses: + perturbation: "perturbation" + seq030: + loss: + _call_with_args_: + - "losses.hide_objects_loss" + - "losses.total_variation" + weights: + - 1 + - 0.0001 diff --git a/mart/configs/model/yolov4.yaml b/mart/configs/model/yolov4.yaml new file mode 100644 index 00000000..c8f16e2a --- /dev/null +++ b/mart/configs/model/yolov4.yaml @@ -0,0 +1,188 @@ +defaults: + - modular + +modules: + yolov4: + # XXX: This is so sketchy... + _target_: torch.hub.load + repo_or_dir: "AlexeyAB/Yet-Another-YOLOv4-Pytorch" + trust_repo: False + model: "yolov4" + pretrained: True + + losses: + _target_: mart.models.yolov3.Loss + image_size: 416 # FIXME: use ${training_data.transform.image_size}? + average: True + + loss: + _target_: mart.nn.Sum + + detections: + _target_: mart.models.yolov3.Detections + nms: true + conf_thres: 0.1 + nms_thres: 0.4 + + output: + _target_: mart.nn.ReturnKwargs + +# training sequence does not produce preds/targets +training_metrics: null + +training_sequence: + seq010: + yolov4: + x: "input" + + seq020: + losses: + logits: "yolov4.logits" + target: "target" + + seq030: + loss: + _call_with_args_: + - "losses.total_loss" + + seq040: + detections: + preds: "yolov4.preds" + target: "target" + + seq050: + output: + loss: "loss" + total_loss: "losses.total_loss" + coord_loss: "losses.coord_loss" + obj_loss: "losses.obj_loss" + noobj_loss: "losses.noobj_loss" + class_loss: "losses.class_loss" + hide_objects_loss: "losses.hide_objects_loss" + target_class_loss: "losses.target_class_loss" + hide_target_objects_loss: "losses.hide_target_objects_loss" + correct_target_class_loss: "losses.correct_target_class_loss" + target_count: "losses.target_count" + score_count: "losses.score_count" + target_score_count: "losses.target_score_count" + +validation_sequence: + seq010: + yolov4: + x: "input" + + seq020: + losses: + logits: "yolov4.logits" + target: "target" + + seq030: + loss: + _call_with_args_: + - "losses.total_loss" + + seq040: + detections: + preds: "yolov4.preds" + target: "target" + + seq050: + output: + preds: "detections.preds" + target: "detections.targets" + loss: "loss" + total_loss: "losses.total_loss" + coord_loss: "losses.coord_loss" + obj_loss: "losses.obj_loss" + noobj_loss: "losses.noobj_loss" + class_loss: "losses.class_loss" + hide_objects_loss: "losses.hide_objects_loss" + target_class_loss: "losses.target_class_loss" + hide_target_objects_loss: "losses.hide_target_objects_loss" + correct_target_class_loss: "losses.correct_target_class_loss" + target_count: "losses.target_count" + score_count: "losses.score_count" + target_score_count: "losses.target_score_count" + +test_sequence: + seq010: + yolov4: + x: "input" + + seq020: + losses: + logits: "yolov4.logits" + target: "target" + + seq030: + loss: + _call_with_args_: + - "losses.total_loss" + + seq040: + detections: + preds: "yolov4.preds" + target: "target" + + seq050: + output: + preds: "detections.preds" + target: "detections.targets" + loss: "loss" + total_loss: "losses.total_loss" + coord_loss: "losses.coord_loss" + obj_loss: "losses.obj_loss" + noobj_loss: "losses.noobj_loss" + class_loss: "losses.class_loss" + hide_objects_loss: "losses.hide_objects_loss" + target_class_loss: "losses.target_class_loss" + hide_target_objects_loss: "losses.hide_target_objects_loss" + correct_target_class_loss: "losses.correct_target_class_loss" + target_count: "losses.target_count" + score_count: "losses.score_count" + target_score_count: "losses.target_score_count" + +training_step_log: + - loss + - total_loss + - coord_loss + - obj_loss + - noobj_loss + - class_loss + - hide_objects_loss + - target_class_loss + - hide_target_objects_loss + - correct_target_class_loss + - target_count + - score_count + - target_score_count + +validation_step_log: + - loss + - total_loss + - coord_loss + - obj_loss + - noobj_loss + - class_loss + - hide_objects_loss + - target_class_loss + - hide_target_objects_loss + - correct_target_class_loss + - target_count + - score_count + - target_score_count + +test_step_log: + - loss + - total_loss + - coord_loss + - obj_loss + - noobj_loss + - class_loss + - hide_objects_loss + - target_class_loss + - hide_target_objects_loss + - correct_target_class_loss + - target_count + - score_count + - target_score_count diff --git a/mart/datamodules/coco.py b/mart/datamodules/coco.py index 795f492d..1e868841 100644 --- a/mart/datamodules/coco.py +++ b/mart/datamodules/coco.py @@ -113,7 +113,7 @@ def to_padded_tensor(tensors, dim=0, fill=0.0): def yolo_collate_fn(batch): - images, targets = tuple(zip(*batch)) + images, targets = collate_fn(batch) images = default_collate(images) @@ -131,3 +131,28 @@ def yolo_collate_fn(batch): target[key] = default_collate(target[key]) return images, target + + +# FIXME: Turn this into a class with options +def yolov4_collate_fn(batch): + images, target = yolo_collate_fn(batch) + + # Generate image indexes for labels along batch axis + packed_length = target["packed_length"] + indices = [i + torch.zeros((length,)) for i, length in enumerate(packed_length)] + indices = torch.cat(indices, dim=0)[..., None] + + # Concatenate labels together along batch axis + packed = [target["packed"][i][:length] for i, length in enumerate(packed_length)] + packed = torch.cat(packed, dim=0) + + # boxes, scores, labels -> labels, boxes + boxes = packed[:, 0:4] + scores = packed[:, 4:5] + labels = torch.argmax(packed[:, 5:], dim=-1, keepdim=True) + + # concatenate in correct order + packed = torch.cat([indices, labels, boxes], dim=-1) + target["packedv4"] = packed + + return images, target diff --git a/mart/models/modular.py b/mart/models/modular.py index 755c10ed..d746ebd9 100644 --- a/mart/models/modular.py +++ b/mart/models/modular.py @@ -179,11 +179,15 @@ def validation_step(self, batch, batch_idx): for log_name, output_key in self.validation_step_log.items(): self.log(f"validation/{log_name}", output[output_key]) - self.validation_metrics(output[self.output_preds_key], output[self.output_target_key]) + if self.validation_metrics is not None: + self.validation_metrics(output[self.output_preds_key], output[self.output_target_key]) return None def validation_epoch_end(self, outputs): + if self.validation_metrics is None: + return + metrics = self.validation_metrics.compute() metrics = self.flatten_metrics(metrics) self.validation_metrics.reset() @@ -201,11 +205,15 @@ def test_step(self, batch, batch_idx): for log_name, output_key in self.test_step_log.items(): self.log(f"test/{log_name}", output[output_key]) - self.test_metrics(output[self.output_preds_key], output[self.output_target_key]) + if self.test_metrics is not None: + self.test_metrics(output[self.output_preds_key], output[self.output_target_key]) return None def test_epoch_end(self, outputs): + if self.test_metrics is None: + return + metrics = self.test_metrics.compute() metrics = self.flatten_metrics(metrics) self.test_metrics.reset() diff --git a/mart/models/yolov3.py b/mart/models/yolov3.py index 38ed9db7..8a4556b4 100644 --- a/mart/models/yolov3.py +++ b/mart/models/yolov3.py @@ -101,7 +101,7 @@ def __init__(self, image_size, average=True, score_thresh=0.01, target_idx=0): self.score_thresh = score_thresh self.target_idx = target_idx - def forward(self, logits, target, **kwargs): + def forward(self, logits, target, perturbation=None, **kwargs): targets = target["packed"] lengths = target["packed_length"] @@ -149,6 +149,15 @@ def forward(self, logits, target, **kwargs): target_count = target_mask.sum() / logits.shape[0] target_score_count = (target_mask & score_mask).sum() / logits.shape[0] + total_variation = None + if perturbation is not None: + total_variation = torch.mean( + torch.sum(torch.square(perturbation[:, 1:, :] - perturbation[:, :-1, :])) + + torch.sum( # noqa: W503 + torch.square(perturbation[:, :, 1:] - perturbation[:, :, :-1]) + ) + ) + return { "total_loss": total_loss, "coord_loss": coord_loss, @@ -162,6 +171,7 @@ def forward(self, logits, target, **kwargs): "score_count": score_count, "target_count": target_count, "target_score_count": target_score_count, + "total_variation": total_variation, } diff --git a/mart/nn/nn.py b/mart/nn/nn.py index 9319a821..37512f42 100644 --- a/mart/nn/nn.py +++ b/mart/nn/nn.py @@ -162,7 +162,10 @@ def __call__( # Append kwargs to args using arg_keys try: - [args.append(kwargs[kwargs_key] if isinstance(kwargs_key, str) else kwargs_key) for kwargs_key in arg_keys] + [ + args.append(kwargs[kwargs_key] if isinstance(kwargs_key, str) else kwargs_key) + for kwargs_key in arg_keys + ] except KeyError as ex: raise Exception( f"{module_name} only received kwargs: {', '.join(kwargs.keys())}." @@ -170,7 +173,10 @@ def __call__( # Replace kwargs with selected kwargs try: - kwargs = {name: kwargs[kwargs_key] if isinstance(kwargs_key, str) else kwargs_key for name, kwargs_key in kwarg_keys.items()} + kwargs = { + name: kwargs[kwargs_key] if isinstance(kwargs_key, str) else kwargs_key + for name, kwargs_key in kwarg_keys.items() + } except KeyError as ex: raise Exception( f"{module_name} only received kwargs: {', '.join(kwargs.keys())}." diff --git a/mart/transforms/extended.py b/mart/transforms/extended.py index 87f13ae0..8a04932b 100644 --- a/mart/transforms/extended.py +++ b/mart/transforms/extended.py @@ -34,7 +34,7 @@ "ConvertCocoPolysToMask", "PadToSquare", "Resize", - "ConvertBoxesToCXCYHW", + "ConvertBoxesToCXCYWH", "RemapLabels", "PackBoxesAndLabels", "CreatePerturbableMaskFromImage", @@ -340,7 +340,12 @@ def resize_coordinates(self, coordinates, ratio): return coordinates -class ConvertBoxesToCXCYHW(ExTransform): +class ConvertBoxesToCXCYWH(ExTransform): + def __init__(self, normalize: bool = False): + super().__init__() + + self.normalize = normalize + def __call__( self, image: Tensor, @@ -349,7 +354,7 @@ def __call__( # X1Y1X2Y2 boxes = target["boxes"] - # X2Y2 -> HW + # X2Y2 -> WH boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] @@ -357,6 +362,14 @@ def __call__( boxes[:, 0] += boxes[:, 2] / 2 boxes[:, 1] += boxes[:, 3] / 2 + if self.normalize: + # image.shape = CHW + C, H, W = image.shape + boxes[:, 0] /= W # x + boxes[:, 1] /= H # y + boxes[:, 2] /= W # w + boxes[:, 3] /= H # h + target["boxes"] = boxes return image, target @@ -472,8 +485,13 @@ def __call__( class PackBoxesAndLabels(ExTransform): - def __init__(self, num_classes: int): - self.num_classes = num_classes + def __init__( + self, + num_classes: int | None, + order: list[str] = None, + ): + self.num_classes = num_classes or 0 + self.order = order or ["boxes", "scores", "labels"] def __call__( self, @@ -484,10 +502,17 @@ def __call__( labels = target["labels"] scores = torch.ones_like(labels)[..., None] - labels = torch.nn.functional.one_hot(labels, num_classes=self.num_classes) + if self.num_classes > 0: + labels = torch.nn.functional.one_hot(labels, num_classes=self.num_classes) + else: + labels = labels[..., None] + + # Concatenate/pack boxes, scores, and labels using given order. + values = {"boxes": boxes, "scores": scores, "labels": labels} + packed = torch.cat([values[key] for key in self.order], dim=-1) - target["packed"] = torch.cat([boxes, scores, labels], dim=-1) - target["packed_length"] = target["packed"].shape[0] + target["packed"] = packed + target["packed_length"] = packed.shape[0] return image, target