diff --git a/armory/instrument/instrument.py b/armory/instrument/instrument.py index f18e19906..c489f1470 100644 --- a/armory/instrument/instrument.py +++ b/armory/instrument/instrument.py @@ -118,59 +118,6 @@ def update(self, *preprocessing, **named_values): # Push to sink self.sink.update(name, value) - def hook(self, module, *preprocessing, input=None, output=None, mode="pytorch"): - if mode == "pytorch": - return self.hook_torch(module, *preprocessing, input=input, output=output) - elif mode == "tf": - return self.hook_tf(module, *preprocessing, input=input, output=output) - raise ValueError(f"mode {mode} not in ('pytorch', 'tf')") - - def hook_tf(self, module, *preprocessing, input=None, output=None): - raise NotImplementedError("hooking not ready for tensorflow") - # NOTE: - # https://discuss.pytorch.org/t/get-the-activations-of-the-second-to-last-layer/55629/6 - # TensorFlow hooks - # https://www.tensorflow.org/api_docs/python/tf/estimator/SessionRunHook - # https://github.com/tensorflow/tensorflow/issues/33478 - # https://github.com/tensorflow/tensorflow/issues/33129 - # https://stackoverflow.com/questions/48966281/get-intermediate-output-from-keras-tensorflow-during-prediction - # https://stackoverflow.com/questions/59493222/access-output-of-intermediate-layers-in-tensor-flow-2-0-in-eager-mode/60945216#60945216 - - def hook_torch(self, module, *preprocessing, input=None, output=None): - if not hasattr(module, "register_forward_hook"): - raise ValueError( - f"module {module} does not have method 'register_forward_hook'. Is it a torch.nn.Module?" - ) - if input == "" or (input is not None and not isinstance(input, str)): - raise ValueError(f"input {input} must be None or a non-empty string") - if output == "" or (output is not None and not isinstance(output, str)): - raise ValueError(f"output {output} must be None or a non-empty string") - if input is None and output is None: - raise ValueError("input and output cannot both be None") - if module in self._hooks: - raise ValueError(f"module {module} is already hooked") - - def hook_fn(hook_module, hook_input, hook_output): - del hook_module - key_values = {} - if input is not None: - key_values[input] = hook_input - if output is not None: - key_values[output] = hook_output - self.update(*preprocessing, **key_values) - - hook = module.register_forward_hook(hook_fn) - self._hooks[module] = (hook, "pytorch") - - def unhook(self, module): - hook, mode = self._hooks.pop(module) - if mode == "pytorch": - hook.remove() - elif mode == "tf": - raise NotImplementedError() - else: - raise ValueError(f"mode {mode} not in ('pytorch', 'tf')") - class MockSink: """ diff --git a/docs/instrumentation_examples.md b/docs/instrumentation_examples.md new file mode 100644 index 000000000..5488920c6 --- /dev/null +++ b/docs/instrumentation_examples.md @@ -0,0 +1,181 @@ +# Armory Instrumentation Examples: Measuring Experiment Artifacts Using Probes and Meters +For an introduction to `Probe`s and `Meter`s, please refer to [Measurement Overview](./metrics.md#instrumentation). We assume the user is capturing artifacts from the model or attack and wishes to use `Probe`s and `Meter`s to monitor certain variables within the code. + +Recall the steps for a minimal working example (in [Measurement Overview](./metrics.md#instrumentation)): +1. Create `Probe` via `get_probe(name)` +2. Place `Probe` actions +3. Create `Meter` with processing functions that take input from created `Probe` +4. Connect `Meter` to `Hub` via `get_hub().connect_meter(meter)` + +The examples will show how each of these steps are accomplished. + +## Example 1: Measuring a Model Layer's Output +### User Story +I am interested in the layer output from the second `relu` activation of a `forward` method located in `armory/baseline_models/pytorch/cifar.py`. +### `Probe` Example Code +The code below is an example of how to accomplish steps 1 and 2 (note the lines of code with `# added` comments at the end) for a model code that the user is modifying. +```python +""" +CNN model for 32x32x3 image classification +""" +... + +from armory.instrument import get_probe # added +probe = get_probe("my_model") # added + +class Net(nn.Module): + ... + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = x.permute(0, 3, 1, 2) # from NHWC to NCHW + x = self.conv1(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.conv2(x) + x = F.relu(x) + x_out = x.detach().cpu().numpy() # added + probe.update(layer_output=x_out) # added + x = F.max_pool2d(x, 2) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output + +... +``` + +#### Step 1 +After importing `get_probe`, `probe = get_probe("my_model")` creates a `Probe` object with the name `"my_model"`, which is what the user can refer to later to apply processing functions through a `Meter` object. + +#### Step 2 +`x_out = x.detach().cpu().numpy()` is taking the layer output of interest (second `relu` activation output) and converting the tensor to `numpy` array on the CPU, which will be passed to `probe`. An updated value of `x_out` is stored in `layer_output` via `probe.update(layer_output=x_out)`. Like the `Probe` name `"my_model"`, `layer_output` can be referenced by the user later to apply additional processing functions through a `Meter` object. + +### `Meter` Example Code +Now that a `Probe` instance has been created, we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script we'll name `user_init_script.py`. In [Config Setup](#config-setup) shortly below, we'll show how to ensure this code is run during scenario initialization. +```python +from armory.instrument import get_hub, Meter + +def set_up_meter(): + meter = Meter( + "my_arbitrary_meter_name", lambda x: x, "my_model.layer_output" + ) + get_hub().connect_meter(meter) +``` +#### Step 3 +In this particular example, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function. +- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context +- For the scope of this document, we only consider simple `Meter`s with the identity function as a metric i.e. `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`) +- The argument passed to the metric/function follows a `.`-separated format (`"my_model.layer_output"`), which needs to be consistent with `Probe` setup earlier: + - `my_model` matches input in `probe = get_probe("my_model")` + - `layer_output` matches variable name in `probe.update(layer_output=x_out)` + +#### Step 4 +For the scope of this document, we don't dwell on what `armory` is doing with `get_hub().connect_meter(meter)` other than to mention this step is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in the modified version of `armory/baseline_models/pytorch/cifar.py`. + +### Config Setup +Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details): +```json +... + "user_init": { + "module": "user_init_script", + "name": "set_up_meter" + }, +... +``` +This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario. + +## Example 2: Measuring Attack Artifact +### User Story +I defined a custom attack with `CARLADapricotPatch` in `armory/custom_attack.py`, and I am interested in the patch after ***every iteration***, which is generated by `CARLADapricotPatch._augment_images_with_patch` and returned as an output. +### `Probe` Example Code +```python +from armory.art_experimental.attacks.carla_obj_det_patch import CARLADapricotPatch +from armory.instrument import get_probe +probe = get_probe("my_attack") + +class CustomAttack(CARLADapricotPatch): + def _augment_images_with_patch(self, **kwargs): + return_value = super()._augment_images_with_patch(**kwargs) + x_patch, patch_target, transformations = return_value + probe.update(attack_output=x_patch) + + return return_value +``` +#### Step 1 +This step is the same as before, except `Probe` name is set to`"my_attack"`, which is what the user can refer to later to apply processing functions through a `Meter` object. + +#### Step 2 +The only difference between `CustomAttack` and `CARLADapricotPatch` is that `_augment_images_with_patch` has been redefined to call on `CARLADapricotPatch._augment_images_with_patch` and then have `probe` update the value for `x_patch` that results from that call. An updated value of `x_patch` is stored in `attack_output` via `probe.update(attack_output=x_patch)`. Like the `Probe` name `"my_attack"`, `attack_output` can be referenced by the user later to apply additional processing functions through a `Meter` object. + +### `Meter` Example Code +As in [Example 1](#meter-example-code), we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script `user_init_script.py`. In [Config Setup](#config-setup-1) shortly below, we'll show how to ensure this code is run during scenario initialization. +```python +from armory.instrument import get_hub, Meter + +def set_up_meter(): + meter = Meter( + "my_arbitrary_meter_name", lambda x: x, "my_attack.attack_output" + ) + get_hub().connect_meter(meter) +``` +#### Step 3 +As before, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function. +- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context +- Again, `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`) +- The argument passed to the metric/function follows a `.`-separated format (`"my_attack.attack_output"`), which needs to be consistent with `Probe` setup earlier: + - `my_attack` matches input in `probe = get_probe("my_attack")` + - `attack_output` matches variable name in `probe.update(attack_output=x_patch)` + +#### Step 4 +Again, `get_hub().connect_meter(meter)` is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in `armory/custom_attack.py`. + +### Config Setup +Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details): +```json +... + "user_init": { + "module": "user_init_script", + "name": "set_up_meter" + }, +... +``` +This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario. + +## Saving Results +By default, outputs from `Meter`s will be saved to the output `json` file after `armory run`. Whether this suffices for the user depends on what the user is trying to measure. + +Users who have tried the examples in this document, however, may run into some of the following warning logs: +> 2022-12-16 19:34:36 30s WARNING armory.instrument.instrument:_write:856 record (name=my_arbitrary_meter_name, batch=0, result=...) size > max_record_size 1048576. Dropping. + +Outputs are saved to a `json` file because of a default `ResultWriter` class tied to the `Meter` class, which has a `max_record_size` limit for each record. Any record that exceeds `max_record_size` will not save to the `json` file. That the outputs exceed a size limit also suggests that a `json` file may not be the best file type to save to. To work around these behaviors, we can define a new `Writer` subclass (`ResultWriter` is also a `Writer` subclass) to work with our examples that does not have a size limit and will save to another filetype, such as a `png` file, since we are saving data for an image. Below is an updated `user_init_script.py` for Example 2 with a new `ImageWriter` class, which uses the `export` method of `ObjectDetectionExporter` to save an image, and a `set_up_meter_writer` function that will be executed with the `user_init` block: +```python +from armory.instrument import get_hub, Meter, Writer +from armory.instrument.export import ObjectDetectionExporter + +class ImageWriter(Writer): + def __init__(self, output_dir): + super().__init__() + self.output_dir = output_dir + self.iter_step = 0 + self.current_batch_index = 0 + self.exporter = ObjectDetectionExporter(self.output_dir) + + def _write(self, name, batch, result): + if batch != self.current_batch_index: + self.current_batch_index = batch # we are on a new batch + self.iter_step = 0 # restart iter_step count + basename = f"{name}_batch_{batch}_iter_{self.iter_step}" + # assume single image per batch: result[0] + self.exporter.export(x = result[0], basename = basename) + self.iter_step += 1 # increment iter_step + +def set_up_meter_writer(): + meter = Meter( + "my_attack_identity", lambda x: x, "my_attack.attack_output" + ) + writer = ImageWriter(output_dir = get_hub().export_dir) + meter.add_writer(writer) + get_hub().connect_meter(meter, use_default_writers=False) +``` \ No newline at end of file diff --git a/docs/metrics.md b/docs/metrics.md index 4f5d18729..a4d0577c3 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -24,7 +24,6 @@ Desired metrics and flags are placed under the key `"metric"` dictionary in the } ``` The `perturbation` and `task` fields can be null, a single string, or a list of strings. -Strings must be a valid armory metric from `armory.utils.metrics`, which are also described in the Metrics section below. The perturbation metrics measure the difference between the benign and adversarial inputs `x`. The task metrics measure the task performance on the predicted value w.r.t the true value `y`, for both benign and adversarial inputs. If task metrics take keyword arguments, such as `"iou_threshold"`, these can be (optionally) added a list of kwarg dicts. @@ -350,7 +349,7 @@ assert results == [7, 11] Since these all use a global Hub object, it doesn't matter which python files they are instantatied in. Probe should be instantiated in the file or class you are trying to measure. -Meters and writers can be instantiated in your initial setup, and can be connected before probes are constructed. +Meters and writers can be instantiated in your initial setup (please refer to [User Initialization](./scenarios.md#user-initialization) for more details about using the `user_init` block), and can be connected before probes are constructed. #### Direct Recording @@ -427,22 +426,6 @@ probe.update(func1, func2, func3, my_var=y) ``` will publish the value `func3(func2(func1(y)))`. -#### Hooking - -Probes can also hook models to enable capturing values without modifying the target code. -Currently, hooking is only implemented for PyTorch, but TensorFlow is on the roadmap. - -To hook a model module, you can use the `hook` function. -For instance, -```python -# probe.hook(module, *preprocessing, input=None, output=None) -probe.hook(convnet.layer1[0].conv2, lambda x: x.detach().cpu().numpy(), output="b") -``` -This essentially wraps the `probe.update` call with a hooking function. -This is intended for usage that cannot or does not modify the target codebase. - -More general hooking (e.g., for python methods) is TBD. - #### Interactive Testing An easy way to test probe outputs is to set the probe to a `MockSink` interface. diff --git a/tests/unit/test_instrument.py b/tests/unit/test_instrument.py index d8dfe1952..6ce2faeda 100644 --- a/tests/unit/test_instrument.py +++ b/tests/unit/test_instrument.py @@ -112,10 +112,6 @@ def not_implemented(*args, **kwargs): probe.update(not_implemented, x=1) sink._is_measuring = True - jax_model = None - with pytest.raises(ValueError): - probe.hook(jax_model, mode="jax") - def get_pytorch_model(): # Taken from https://pytorch.org/docs/stable/generated/torch.nn.Module.html @@ -135,45 +131,6 @@ def forward(self, x): return Model() -@pytest.mark.docker_required -def test_probe_pytorch_hook(): - import torch - - instrument.del_globals() - sink = HelperSink() - probe = instrument.Probe("model", sink=sink) - model = get_pytorch_model() - probe.hook(model.conv1, lambda x: x.detach().cpu().numpy(), output="b") - - key = "model.b" - assert key not in sink.probe_variables - - x1 = torch.rand((1, 1, 28, 28)) - model(x1) - b1 = sink.probe_variables["model.b"] - assert b1.shape == (1, 20, 24, 24) - x2 = torch.rand((1, 1, 28, 28)) - model(x2) - b2 = sink.probe_variables["model.b"] - assert b2.shape == (1, 20, 24, 24) - assert not (b1 == b2).all() - probe.unhook(model.conv1) - # probe is unhooked, no update should occur - model(x1) - b3 = sink.probe_variables["model.b"] - assert b3 is b2 - - -@pytest.mark.docker_required -def test_probe_tensorflow_hook(): - # Once implemented, update test - instrument.del_globals() - probe = instrument.get_probe() - with pytest.raises(NotImplementedError): - tf_model = None - probe.hook(tf_model, mode="tf") - - def test_process_meter_arg(): for arg in ("x", "scenario.x"): assert instrument.process_meter_arg(arg) == (arg, None)