Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
601e78e
adding documentation for advanced end-to-end examples of probe and me…
ppark-twosixtech Dec 12, 2022
3f739a4
finished initial draft of Example 1
ppark-twosixtech Dec 12, 2022
7db1aa4
updated Example 1 with an extra step for connecting Probe, which need…
ppark-twosixtech Dec 13, 2022
dc772d9
started documentation for Example 3
ppark-twosixtech Dec 13, 2022
74365ed
added draft of Example 3 and additional discussion of other possible …
ppark-twosixtech Dec 14, 2022
28c3152
added updates for minimal working example of probe and method hooking…
ppark-twosixtech Dec 14, 2022
fdb42e3
Revert "added draft of Example 3 and additional discussion of other p…
ppark-twosixtech Dec 16, 2022
48234e1
added draft of Example 3 and additional discussion of other possible …
ppark-twosixtech Dec 14, 2022
0e42c43
Revert "added updates for minimal working example of probe and method…
ppark-twosixtech Dec 16, 2022
dbbd8c9
made a copy of updated probe.md (probe_hooking_notebook.md) to save a…
ppark-twosixtech Dec 16, 2022
e6e13da
completed initial draft of probe.md with new examples; need to test a…
ppark-twosixtech Dec 16, 2022
886c3d4
Merge branch 'develop' into probe-example
ppark-twosixtech Dec 16, 2022
d0c53e0
tested examples with armory run and corrected user_init block name fo…
ppark-twosixtech Dec 16, 2022
8a4b9ff
removed code and documentation referring to hook methods defined for …
ppark-twosixtech Dec 16, 2022
89d200a
resolving pr comments
ppark-twosixtech Dec 16, 2022
f402f30
added new section for saving outputs to pickle files
ppark-twosixtech Dec 17, 2022
dd462e2
added reference links to user_init block in scenarios.md
ppark-twosixtech Dec 19, 2022
1371a49
added line number references for code blocks
ppark-twosixtech Dec 19, 2022
ee52a9e
updated file name and title
ppark-twosixtech Dec 19, 2022
72577b7
updated documentation to use a new Writer class for saving to a pkl f…
ppark-twosixtech Dec 19, 2022
dd8feb7
updated documentation and tested example using a defined ImageWriter …
ppark-twosixtech Dec 20, 2022
ba3bedc
Merge branch 'develop' into probe-example
ppark-twosixtech Dec 22, 2022
c0f2780
removing unit tests involving Probe hook method
ppark-twosixtech Dec 22, 2022
c24837a
resolved some PR comments
ppark-twosixtech Dec 22, 2022
9d35fa8
shorter code snippet to resolve PR comment
ppark-twosixtech Dec 22, 2022
445a2ab
removed line references since Github Markdown does not support adding…
ppark-twosixtech Dec 22, 2022
8f57948
updated documentation for consistency between examples
ppark-twosixtech Dec 22, 2022
8cf8578
resolved PR comment on whether model and attack block in config file …
ppark-twosixtech Dec 22, 2022
f1fbb0c
resolved PR comment by changing ImageClassificationExporter to Object…
ppark-twosixtech Dec 22, 2022
30a5b92
updated variable names and added comments for clarity
ppark-twosixtech Dec 22, 2022
b282870
removing hooking example documentation for now
ppark-twosixtech Dec 22, 2022
6de62f2
missed substitution
ppark-twosixtech Dec 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 0 additions & 53 deletions armory/instrument/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,59 +118,6 @@ def update(self, *preprocessing, **named_values):
# Push to sink
self.sink.update(name, value)

def hook(self, module, *preprocessing, input=None, output=None, mode="pytorch"):
if mode == "pytorch":
return self.hook_torch(module, *preprocessing, input=input, output=output)
elif mode == "tf":
return self.hook_tf(module, *preprocessing, input=input, output=output)
raise ValueError(f"mode {mode} not in ('pytorch', 'tf')")

def hook_tf(self, module, *preprocessing, input=None, output=None):
raise NotImplementedError("hooking not ready for tensorflow")
# NOTE:
# https://discuss.pytorch.org/t/get-the-activations-of-the-second-to-last-layer/55629/6
# TensorFlow hooks
# https://www.tensorflow.org/api_docs/python/tf/estimator/SessionRunHook
# https://github.com/tensorflow/tensorflow/issues/33478
# https://github.com/tensorflow/tensorflow/issues/33129
# https://stackoverflow.com/questions/48966281/get-intermediate-output-from-keras-tensorflow-during-prediction
# https://stackoverflow.com/questions/59493222/access-output-of-intermediate-layers-in-tensor-flow-2-0-in-eager-mode/60945216#60945216

def hook_torch(self, module, *preprocessing, input=None, output=None):
if not hasattr(module, "register_forward_hook"):
raise ValueError(
f"module {module} does not have method 'register_forward_hook'. Is it a torch.nn.Module?"
)
if input == "" or (input is not None and not isinstance(input, str)):
raise ValueError(f"input {input} must be None or a non-empty string")
if output == "" or (output is not None and not isinstance(output, str)):
raise ValueError(f"output {output} must be None or a non-empty string")
if input is None and output is None:
raise ValueError("input and output cannot both be None")
if module in self._hooks:
raise ValueError(f"module {module} is already hooked")

def hook_fn(hook_module, hook_input, hook_output):
del hook_module
key_values = {}
if input is not None:
key_values[input] = hook_input
if output is not None:
key_values[output] = hook_output
self.update(*preprocessing, **key_values)

hook = module.register_forward_hook(hook_fn)
self._hooks[module] = (hook, "pytorch")

def unhook(self, module):
hook, mode = self._hooks.pop(module)
if mode == "pytorch":
hook.remove()
elif mode == "tf":
raise NotImplementedError()
else:
raise ValueError(f"mode {mode} not in ('pytorch', 'tf')")


class MockSink:
"""
Expand Down
181 changes: 181 additions & 0 deletions docs/instrumentation_examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Armory Instrumentation Examples: Measuring Experiment Artifacts Using Probes and Meters
For an introduction to `Probe`s and `Meter`s, please refer to [Measurement Overview](./metrics.md#instrumentation). We assume the user is capturing artifacts from the model or attack and wishes to use `Probe`s and `Meter`s to monitor certain variables within the code.

Recall the steps for a minimal working example (in [Measurement Overview](./metrics.md#instrumentation)):
1. Create `Probe` via `get_probe(name)`
2. Place `Probe` actions
3. Create `Meter` with processing functions that take input from created `Probe`
4. Connect `Meter` to `Hub` via `get_hub().connect_meter(meter)`

The examples will show how each of these steps are accomplished.

## Example 1: Measuring a Model Layer's Output
### User Story
I am interested in the layer output from the second `relu` activation of a `forward` method located in `armory/baseline_models/pytorch/cifar.py`.
### `Probe` Example Code
The code below is an example of how to accomplish steps 1 and 2 (note the lines of code with `# added` comments at the end) for a model code that the user is modifying.
```python
"""
CNN model for 32x32x3 image classification
"""
...

from armory.instrument import get_probe # added
probe = get_probe("my_model") # added

class Net(nn.Module):
...

def forward(self, x: torch.Tensor) -> torch.Tensor:
x = x.permute(0, 3, 1, 2) # from NHWC to NCHW
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.conv2(x)
x = F.relu(x)
x_out = x.detach().cpu().numpy() # added
probe.update(layer_output=x_out) # added
x = F.max_pool2d(x, 2)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output

...
```

#### Step 1
After importing `get_probe`, `probe = get_probe("my_model")` creates a `Probe` object with the name `"my_model"`, which is what the user can refer to later to apply processing functions through a `Meter` object.

#### Step 2
`x_out = x.detach().cpu().numpy()` is taking the layer output of interest (second `relu` activation output) and converting the tensor to `numpy` array on the CPU, which will be passed to `probe`. An updated value of `x_out` is stored in `layer_output` via `probe.update(layer_output=x_out)`. Like the `Probe` name `"my_model"`, `layer_output` can be referenced by the user later to apply additional processing functions through a `Meter` object.

### `Meter` Example Code
Now that a `Probe` instance has been created, we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script we'll name `user_init_script.py`. In [Config Setup](#config-setup) shortly below, we'll show how to ensure this code is run during scenario initialization.
```python
from armory.instrument import get_hub, Meter

def set_up_meter():
meter = Meter(
"my_arbitrary_meter_name", lambda x: x, "my_model.layer_output"
)
get_hub().connect_meter(meter)
```
#### Step 3
In this particular example, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function.
- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context
- For the scope of this document, we only consider simple `Meter`s with the identity function as a metric i.e. `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`)
- The argument passed to the metric/function follows a `.`-separated format (`"my_model.layer_output"`), which needs to be consistent with `Probe` setup earlier:
- `my_model` matches input in `probe = get_probe("my_model")`
- `layer_output` matches variable name in `probe.update(layer_output=x_out)`

#### Step 4
For the scope of this document, we don't dwell on what `armory` is doing with `get_hub().connect_meter(meter)` other than to mention this step is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in the modified version of `armory/baseline_models/pytorch/cifar.py`.

### Config Setup
Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details):
```json
...
"user_init": {
"module": "user_init_script",
"name": "set_up_meter"
},
...
```
This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario.

## Example 2: Measuring Attack Artifact
### User Story
I defined a custom attack with `CARLADapricotPatch` in `armory/custom_attack.py`, and I am interested in the patch after <ins>***every iteration***</ins>, which is generated by `CARLADapricotPatch._augment_images_with_patch` and returned as an output.
### `Probe` Example Code
```python
from armory.art_experimental.attacks.carla_obj_det_patch import CARLADapricotPatch
from armory.instrument import get_probe
probe = get_probe("my_attack")

class CustomAttack(CARLADapricotPatch):
def _augment_images_with_patch(self, **kwargs):
return_value = super()._augment_images_with_patch(**kwargs)
x_patch, patch_target, transformations = return_value
probe.update(attack_output=x_patch)

return return_value
```
#### Step 1
This step is the same as before, except `Probe` name is set to`"my_attack"`, which is what the user can refer to later to apply processing functions through a `Meter` object.

#### Step 2
The only difference between `CustomAttack` and `CARLADapricotPatch` is that `_augment_images_with_patch` has been redefined to call on `CARLADapricotPatch._augment_images_with_patch` and then have `probe` update the value for `x_patch` that results from that call. An updated value of `x_patch` is stored in `attack_output` via `probe.update(attack_output=x_patch)`. Like the `Probe` name `"my_attack"`, `attack_output` can be referenced by the user later to apply additional processing functions through a `Meter` object.

### `Meter` Example Code
As in [Example 1](#meter-example-code), we need to create a `Meter` object to accept any updated values from `Probe` and apply further processing that the user desires. We can create the `Meter` in a function added to a local Python script `user_init_script.py`. In [Config Setup](#config-setup-1) shortly below, we'll show how to ensure this code is run during scenario initialization.
```python
from armory.instrument import get_hub, Meter

def set_up_meter():
meter = Meter(
"my_arbitrary_meter_name", lambda x: x, "my_attack.attack_output"
)
get_hub().connect_meter(meter)
```
#### Step 3
As before, the `Meter` accepts 3 inputs: a meter name, a metric/function for processing, and a argument name to pass the metric/function.
- The meter name (`"my_arbitrary_meter_name"`) can be arbitrary within this context
- Again, `Meter` will record variables monitored by `Probe` as-is (thus `lambda x: x`)
- The argument passed to the metric/function follows a `.`-separated format (`"my_attack.attack_output"`), which needs to be consistent with `Probe` setup earlier:
- `my_attack` matches input in `probe = get_probe("my_attack")`
- `attack_output` matches variable name in `probe.update(attack_output=x_patch)`

#### Step 4
Again, `get_hub().connect_meter(meter)` is necessary for establishing the connection between `meter` created in `armory/user_init_script.py` and `probe` created in `armory/custom_attack.py`.

### Config Setup
Last but not least, the config file passed to `armory run` needs to be updated for these changes to take effect, which is accomplished by adding the `"user_init"` block (please refer to [User Initialization](./scenarios.md#user-initialization) for more details):
```json
...
"user_init": {
"module": "user_init_script",
"name": "set_up_meter"
},
...
```
This will prompt armory to run `set_up_meter` in `user_init_script.py` before anything else is loaded for the scenario.

## Saving Results
By default, outputs from `Meter`s will be saved to the output `json` file after `armory run`. Whether this suffices for the user depends on what the user is trying to measure.

Users who have tried the examples in this document, however, may run into some of the following warning logs:
> 2022-12-16 19:34:36 30s WARNING armory.instrument.instrument:_write:856 record (name=my_arbitrary_meter_name, batch=0, result=...) size > max_record_size 1048576. Dropping.

Outputs are saved to a `json` file because of a default `ResultWriter` class tied to the `Meter` class, which has a `max_record_size` limit for each record. Any record that exceeds `max_record_size` will not save to the `json` file. That the outputs exceed a size limit also suggests that a `json` file may not be the best file type to save to. To work around these behaviors, we can define a new `Writer` subclass (`ResultWriter` is also a `Writer` subclass) to work with our examples that does not have a size limit and will save to another filetype, such as a `png` file, since we are saving data for an image. Below is an updated `user_init_script.py` for Example 2 with a new `ImageWriter` class, which uses the `export` method of `ObjectDetectionExporter` to save an image, and a `set_up_meter_writer` function that will be executed with the `user_init` block:
```python
from armory.instrument import get_hub, Meter, Writer
from armory.instrument.export import ObjectDetectionExporter

class ImageWriter(Writer):
def __init__(self, output_dir):
super().__init__()
self.output_dir = output_dir
self.iter_step = 0
self.current_batch_index = 0
self.exporter = ObjectDetectionExporter(self.output_dir)

def _write(self, name, batch, result):
if batch != self.current_batch_index:
self.current_batch_index = batch # we are on a new batch
self.iter_step = 0 # restart iter_step count
basename = f"{name}_batch_{batch}_iter_{self.iter_step}"
# assume single image per batch: result[0]
self.exporter.export(x = result[0], basename = basename)
self.iter_step += 1 # increment iter_step

def set_up_meter_writer():
meter = Meter(
"my_attack_identity", lambda x: x, "my_attack.attack_output"
)
writer = ImageWriter(output_dir = get_hub().export_dir)
meter.add_writer(writer)
get_hub().connect_meter(meter, use_default_writers=False)
```
19 changes: 1 addition & 18 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Desired metrics and flags are placed under the key `"metric"` dictionary in the
}
```
The `perturbation` and `task` fields can be null, a single string, or a list of strings.
Strings must be a valid armory metric from `armory.utils.metrics`, which are also described in the Metrics section below.
The perturbation metrics measure the difference between the benign and adversarial inputs `x`.
The task metrics measure the task performance on the predicted value w.r.t the true value `y`, for both benign and adversarial inputs.
If task metrics take keyword arguments, such as `"iou_threshold"`, these can be (optionally) added a list of kwarg dicts.
Expand Down Expand Up @@ -350,7 +349,7 @@ assert results == [7, 11]

Since these all use a global Hub object, it doesn't matter which python files they are instantatied in.
Probe should be instantiated in the file or class you are trying to measure.
Meters and writers can be instantiated in your initial setup, and can be connected before probes are constructed.
Meters and writers can be instantiated in your initial setup (please refer to [User Initialization](./scenarios.md#user-initialization) for more details about using the `user_init` block), and can be connected before probes are constructed.

#### Direct Recording

Expand Down Expand Up @@ -427,22 +426,6 @@ probe.update(func1, func2, func3, my_var=y)
```
will publish the value `func3(func2(func1(y)))`.

#### Hooking

Probes can also hook models to enable capturing values without modifying the target code.
Currently, hooking is only implemented for PyTorch, but TensorFlow is on the roadmap.

To hook a model module, you can use the `hook` function.
For instance,
```python
# probe.hook(module, *preprocessing, input=None, output=None)
probe.hook(convnet.layer1[0].conv2, lambda x: x.detach().cpu().numpy(), output="b")
```
This essentially wraps the `probe.update` call with a hooking function.
This is intended for usage that cannot or does not modify the target codebase.

More general hooking (e.g., for python methods) is TBD.

#### Interactive Testing

An easy way to test probe outputs is to set the probe to a `MockSink` interface.
Expand Down
43 changes: 0 additions & 43 deletions tests/unit/test_instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,6 @@ def not_implemented(*args, **kwargs):
probe.update(not_implemented, x=1)
sink._is_measuring = True

jax_model = None
with pytest.raises(ValueError):
probe.hook(jax_model, mode="jax")


def get_pytorch_model():
# Taken from https://pytorch.org/docs/stable/generated/torch.nn.Module.html
Expand All @@ -135,45 +131,6 @@ def forward(self, x):
return Model()


@pytest.mark.docker_required
def test_probe_pytorch_hook():
import torch

instrument.del_globals()
sink = HelperSink()
probe = instrument.Probe("model", sink=sink)
model = get_pytorch_model()
probe.hook(model.conv1, lambda x: x.detach().cpu().numpy(), output="b")

key = "model.b"
assert key not in sink.probe_variables

x1 = torch.rand((1, 1, 28, 28))
model(x1)
b1 = sink.probe_variables["model.b"]
assert b1.shape == (1, 20, 24, 24)
x2 = torch.rand((1, 1, 28, 28))
model(x2)
b2 = sink.probe_variables["model.b"]
assert b2.shape == (1, 20, 24, 24)
assert not (b1 == b2).all()
probe.unhook(model.conv1)
# probe is unhooked, no update should occur
model(x1)
b3 = sink.probe_variables["model.b"]
assert b3 is b2


@pytest.mark.docker_required
def test_probe_tensorflow_hook():
# Once implemented, update test
instrument.del_globals()
probe = instrument.get_probe()
with pytest.raises(NotImplementedError):
tf_model = None
probe.hook(tf_model, mode="tf")


def test_process_meter_arg():
for arg in ("x", "scenario.x"):
assert instrument.process_meter_arg(arg) == (arg, None)
Expand Down