twosixlabs · lcadalzo · Dec 22, 2022 · Dec 12, 2022 · Dec 12, 2022 · Dec 13, 2022
diff --git a/armory/instrument/instrument.py b/armory/instrument/instrument.py
@@ -118,59 +118,6 @@ def update(self, *preprocessing, **named_values):
                 # Push to sink
                 self.sink.update(name, value)
 
-    def hook(self, module, *preprocessing, input=None, output=None, mode="pytorch"):
-        if mode == "pytorch":
-            return self.hook_torch(module, *preprocessing, input=input, output=output)
-        elif mode == "tf":
-            return self.hook_tf(module, *preprocessing, input=input, output=output)
-        raise ValueError(f"mode {mode} not in ('pytorch', 'tf')")
-
-    def hook_tf(self, module, *preprocessing, input=None, output=None):
-        raise NotImplementedError("hooking not ready for tensorflow")
-        # NOTE:
-        # https://discuss.pytorch.org/t/get-the-activations-of-the-second-to-last-layer/55629/6
-        # TensorFlow hooks
-        # https://www.tensorflow.org/api_docs/python/tf/estimator/SessionRunHook
-        # https://github.com/tensorflow/tensorflow/issues/33478
-        # https://github.com/tensorflow/tensorflow/issues/33129
-        # https://stackoverflow.com/questions/48966281/get-intermediate-output-from-keras-tensorflow-during-prediction
-        # https://stackoverflow.com/questions/59493222/access-output-of-intermediate-layers-in-tensor-flow-2-0-in-eager-mode/60945216#60945216
-
-    def hook_torch(self, module, *preprocessing, input=None, output=None):
-        if not hasattr(module, "register_forward_hook"):
-            raise ValueError(
-                f"module {module} does not have method 'register_forward_hook'. Is it a torch.nn.Module?"
-            )
-        if input == "" or (input is not None and not isinstance(input, str)):
-            raise ValueError(f"input {input} must be None or a non-empty string")
-        if output == "" or (output is not None and not isinstance(output, str)):
-            raise ValueError(f"output {output} must be None or a non-empty string")
-        if input is None and output is None:
-            raise ValueError("input and output cannot both be None")
-        if module in self._hooks:
-            raise ValueError(f"module {module} is already hooked")
-
-        def hook_fn(hook_module, hook_input, hook_output):
-            del hook_module
-            key_values = {}
-            if input is not None:
-                key_values[input] = hook_input
-            if output is not None:
-                key_values[output] = hook_output
-            self.update(*preprocessing, **key_values)
-
-        hook = module.register_forward_hook(hook_fn)
-        self._hooks[module] = (hook, "pytorch")
-
-    def unhook(self, module):
-        hook, mode = self._hooks.pop(module)
-        if mode == "pytorch":
-            hook.remove()
-        elif mode == "tf":
-            raise NotImplementedError()
-        else:
-            raise ValueError(f"mode {mode} not in ('pytorch', 'tf')")
-
 
 class MockSink:
     """

diff --git a/docs/instrumentation_examples.md b/docs/instrumentation_examples.md
@@ -0,0 +1,181 @@
+# Armory Instrumentation Examples: Measuring Experiment Artifacts Using Probes and Meters
+For an introduction to `Probe`s and `Meter`s, please refer to [Measurement Overview](./metrics.md#instrumentation). We assume the user is capturing artifacts from the model or attack and wishes to use `Probe`s and `Meter`s to monitor certain variables within the code.
+
+Recall the steps for a minimal working example (in [Measurement Overview](./metrics.md#instrumentation)):
+1. Create `Probe` via `get_probe(name)`
+2. Place `Probe` actions
+3. Create `Meter` with processing functions that take input from created `Probe`
+4. Connect `Meter` to `Hub` via `get_hub().connect_meter(meter)`
+
+The examples will show how each of these steps are accomplished.
+
+## Example 1: Measuring a Model Layer's Output
+### User Story
+I am interested in the layer output from the second `relu` activation of a `forward` method located in `armory/baseline_models/pytorch/cifar.py`.
+### `Probe` Example Code
+The code below is an example of how to accomplish steps 1 and 2 (note the lines of code with `# added` comments at the end) for a model code that the user is modifying.
+```python
+"""
+CNN model for 32x32x3 image classification
+"""
+...
+
+from armory.instrument import get_probe # added
+probe = get_probe("my_model") # added
+
+class Net(nn.Module):
+    ...
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x.permute(0, 3, 1, 2)  # from NHWC to NCHW
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x_out = x.detach().cpu().numpy() # added
+        probe.update(layer_output=x_out) # added
+        x = F.max_pool2d(x, 2)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+...
+```
+
+#### Step 1
+After importing `get_probe`, `probe = get_probe("my_model")` creates a `Probe` object with the name `"my_model"`, which is what the user can refer to later to apply processing functions through a `Meter` object.
+
+#### Step 2
+`x_out = x.detach().cpu().numpy()` is taking the layer output of interest (second `relu` activation output) and converting the tensor to `numpy` array on the CPU, which will be passed to `probe`. An updated value of `x_out` is stored in `layer_output` via `probe.update(layer_output=x_out)`. Like the `Probe` name `"my_model"`, `layer_output` can be referenced by the user later to apply additional processing functions through a `Meter` object.
+
+### `Meter` Example Code
+Now that a `Probe` instance has been created, we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script we'll name `user_init_script.py`. In [Config Setup](#config-setup) shortly below, we'll show how to ensure this code is run during scenario initialization.
+```python
+from armory.instrument import get_hub, Meter
+
+def set_up_meter():
+    meter = Meter(
+        "my_arbitrary_meter_name", lambda x: x, "my_model.layer_output"
+    )
+    get_hub().connect_meter(meter)
+```
+#### Step 3
+In this particular example, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function.
+- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context
+- For the scope of this document, we only consider simple `Meter`s with the identity function as a metric i.e. `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`)
+- The argument passed to the metric/function follows a `.`-separated format (`"my_model.layer_output"`), which needs to be consistent with `Probe` setup earlier:
+    - `my_model` matches input in `probe = get_probe("my_model")`
+    - `layer_output` matches variable name in `probe.update(layer_output=x_out)`
+
+#### Step 4
+For the scope of this document, we don't dwell on what `armory` is doing with `get_hub().connect_meter(meter)` other than to mention this step is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in the modified version of `armory/baseline_models/pytorch/cifar.py`.
+
+### Config Setup
+Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details):
+```json
+...
+    "user_init": {
+        "module": "user_init_script",
+        "name": "set_up_meter"
+    },
+...
+```
+This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario.
+
+## Example 2: Measuring Attack Artifact
+### User Story
+I defined a custom attack with `CARLADapricotPatch` in `armory/custom_attack.py`, and I am interested in the patch after <ins>***every iteration***</ins>, which is generated by `CARLADapricotPatch._augment_images_with_patch` and returned as an output.
+### `Probe` Example Code
+```python
+from armory.art_experimental.attacks.carla_obj_det_patch import CARLADapricotPatch
+from armory.instrument import get_probe
+probe = get_probe("my_attack")
+
+class CustomAttack(CARLADapricotPatch):
+    def _augment_images_with_patch(self, **kwargs):
+        return_value = super()._augment_images_with_patch(**kwargs)
+        x_patch, patch_target, transformations = return_value
+        probe.update(attack_output=x_patch)
+
+        return return_value
+```
+#### Step 1
+This step is the same as before, except `Probe` name is set to`"my_attack"`, which is what the user can refer to later to apply processing functions through a `Meter` object.
+
+#### Step 2
+The only difference between `CustomAttack` and `CARLADapricotPatch` is that `_augment_images_with_patch` has been redefined to call on `CARLADapricotPatch._augment_images_with_patch` and then have `probe` update the value for `x_patch` that results from that call. An updated value of `x_patch` is stored in `attack_output` via `probe.update(attack_output=x_patch)`. Like the `Probe` name `"my_attack"`, `attack_output` can be referenced by the user later to apply additional processing functions through a `Meter` object.
+
+### `Meter` Example Code
+As in [Example 1](#meter-example-code), we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script `user_init_script.py`. In [Config Setup](#config-setup-1) shortly below, we'll show how to ensure this code is run during scenario initialization.
+```python
+from armory.instrument import get_hub, Meter
+
+def set_up_meter():
+    meter = Meter(
+        "my_arbitrary_meter_name", lambda x: x, "my_attack.attack_output"
+    )
+    get_hub().connect_meter(meter)
+```
+#### Step 3
+As before, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function.
+- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context
+- Again, `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`)
+- The argument passed to the metric/function follows a `.`-separated format (`"my_attack.attack_output"`), which needs to be consistent with `Probe` setup earlier:
+    - `my_attack` matches input in `probe = get_probe("my_attack")`
+    - `attack_output` matches variable name in `probe.update(attack_output=x_patch)`
+
+#### Step 4
+Again, `get_hub().connect_meter(meter)` is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in `armory/custom_attack.py`.
+
+### Config Setup
+Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details):
+```json
+...
+    "user_init": {
+        "module": "user_init_script",
+        "name": "set_up_meter"
+    },
+...
+```
+This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario.
+
+## Saving Results
+By default, outputs from `Meter`s will be saved to the output `json` file after `armory run`. Whether this suffices for the user depends on what the user is trying to measure.
+
+Users who have tried the examples in this document, however, may run into some of the following warning logs:
+> 2022-12-16 19:34:36 30s WARNING  armory.instrument.instrument:_write:856 record (name=my_arbitrary_meter_name, batch=0, result=...) size > max_record_size 1048576. Dropping.
+
+Outputs are saved to a `json` file because of a default `ResultWriter` class tied to the `Meter` class, which has a `max_record_size` limit for each record. Any record that exceeds `max_record_size` will not save to the `json` file. That the outputs exceed a size limit also suggests that a `json` file may not be the best file type to save to. To work around these behaviors, we can define a new `Writer` subclass (`ResultWriter` is also a `Writer` subclass) to work with our examples that does not have a size limit and will save to another filetype, such as a `png` file, since we are saving data for an image. Below is an updated `user_init_script.py` for Example 2 with a new `ImageWriter` class, which uses the `export` method of `ObjectDetectionExporter` to save an image, and a `set_up_meter_writer` function that will be executed with the `user_init` block:
+```python
+from armory.instrument import get_hub, Meter, Writer
+from armory.instrument.export import ObjectDetectionExporter
+
+class ImageWriter(Writer):
+    def __init__(self, output_dir):
+        super().__init__()
+        self.output_dir = output_dir
+        self.iter_step = 0
+        self.current_batch_index = 0
+        self.exporter = ObjectDetectionExporter(self.output_dir)
+
+    def _write(self, name, batch, result):
+        if batch != self.current_batch_index:
+            self.current_batch_index = batch # we are on a new batch
+            self.iter_step = 0               # restart iter_step count
+        basename = f"{name}_batch_{batch}_iter_{self.iter_step}"
+        # assume single image per batch: result[0]
+        self.exporter.export(x = result[0], basename = basename)
+        self.iter_step += 1 # increment iter_step
+
+def set_up_meter_writer():
+    meter = Meter(
+        "my_attack_identity", lambda x: x, "my_attack.attack_output"
+    )
+    writer = ImageWriter(output_dir = get_hub().export_dir)
+    meter.add_writer(writer)
+    get_hub().connect_meter(meter, use_default_writers=False)
+```
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -24,7 +24,6 @@ Desired metrics and flags are placed under the key `"metric"` dictionary in the
 }
 ```
 The `perturbation` and `task` fields can be null, a single string, or a list of strings.
-Strings must be a valid armory metric from `armory.utils.metrics`, which are also described in the Metrics section below.
 The perturbation metrics measure the difference between the benign and adversarial inputs `x`.
 The task metrics measure the task performance on the predicted value w.r.t the true value `y`, for both benign and adversarial inputs.
 If task metrics take keyword arguments, such as `"iou_threshold"`, these can be (optionally) added a list of kwarg dicts.
@@ -350,7 +349,7 @@ assert results == [7, 11]
 
 Since these all use a global Hub object, it doesn't matter which python files they are instantatied in.
 Probe should be instantiated in the file or class you are trying to measure.
-Meters and writers can be instantiated in your initial setup, and can be connected before probes are constructed.
+Meters and writers can be instantiated in your initial setup (please refer to [User Initialization](./scenarios.md#user-initialization) for more details about using the `user_init` block), and can be connected before probes are constructed.
 
 #### Direct Recording
 
@@ -427,22 +426,6 @@ probe.update(func1, func2, func3, my_var=y)
 ```
 will publish the value `func3(func2(func1(y)))`. 
 
-#### Hooking
-
-Probes can also hook models to enable capturing values without modifying the target code.
-Currently, hooking is only implemented for PyTorch, but TensorFlow is on the roadmap.
-
-To hook a model module, you can use the `hook` function.
-For instance, 
-```python
-# probe.hook(module, *preprocessing, input=None, output=None)
-probe.hook(convnet.layer1[0].conv2, lambda x: x.detach().cpu().numpy(), output="b")
-```
-This essentially wraps the `probe.update` call with a hooking function.
-This is intended for usage that cannot or does not modify the target codebase.
-
-More general hooking (e.g., for python methods) is TBD.
-
 #### Interactive Testing
 
 An easy way to test probe outputs is to set the probe to a `MockSink` interface.

diff --git a/tests/unit/test_instrument.py b/tests/unit/test_instrument.py
@@ -112,10 +112,6 @@ def not_implemented(*args, **kwargs):
     probe.update(not_implemented, x=1)
     sink._is_measuring = True
 
-    jax_model = None
-    with pytest.raises(ValueError):
-        probe.hook(jax_model, mode="jax")
-
 
 def get_pytorch_model():
     # Taken from https://pytorch.org/docs/stable/generated/torch.nn.Module.html
@@ -135,45 +131,6 @@ def forward(self, x):
     return Model()
 
 
-@pytest.mark.docker_required
-def test_probe_pytorch_hook():
-    import torch
-
-    instrument.del_globals()
-    sink = HelperSink()
-    probe = instrument.Probe("model", sink=sink)
-    model = get_pytorch_model()
-    probe.hook(model.conv1, lambda x: x.detach().cpu().numpy(), output="b")
-
-    key = "model.b"
-    assert key not in sink.probe_variables
-
-    x1 = torch.rand((1, 1, 28, 28))
-    model(x1)
-    b1 = sink.probe_variables["model.b"]
-    assert b1.shape == (1, 20, 24, 24)
-    x2 = torch.rand((1, 1, 28, 28))
-    model(x2)
-    b2 = sink.probe_variables["model.b"]
-    assert b2.shape == (1, 20, 24, 24)
-    assert not (b1 == b2).all()
-    probe.unhook(model.conv1)
-    # probe is unhooked, no update should occur
-    model(x1)
-    b3 = sink.probe_variables["model.b"]
-    assert b3 is b2
-
-
-@pytest.mark.docker_required
-def test_probe_tensorflow_hook():
-    # Once implemented, update test
-    instrument.del_globals()
-    probe = instrument.get_probe()
-    with pytest.raises(NotImplementedError):
-        tf_model = None
-        probe.hook(tf_model, mode="tf")
-
-
 def test_process_meter_arg():
     for arg in ("x", "scenario.x"):
         assert instrument.process_meter_arg(arg) == (arg, None)