From 4e3216e7e10d029a1a3882a2d71a17750801dd81 Mon Sep 17 00:00:00 2001 From: rohori Date: Tue, 2 Nov 2021 08:12:33 +0900 Subject: [PATCH 1/5] Replace mypy with pyright and pytype --- .github/workflows/static.yml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 385fa3c..c2e9315 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -44,7 +44,7 @@ jobs: # treat Usage error, Warning, Error, Fatal as failure. # see bit-encoded exit codes of pylint for detail: # https://pylint.pycqa.org/en/latest/user_guide/run.html#exit-codes - mypy: + pyright: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -56,5 +56,24 @@ jobs: curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python echo "$HOME/.local/bin" >> $GITHUB_PATH poetry install --no-interaction - - name: mypy - run: poetry run mypy src tests + - uses: actions/setup-node@v2 + - name: install pyright + run: npm install -g pyright + - name: pyright + run: poetry run pyright src tests + pytype: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: install dependencies + run: | + curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python + echo "$HOME/.local/bin" >> $GITHUB_PATH + poetry install --no-interaction + - name: install pytype + run: poetry add -D pytype + - name: pytype + run: poetry run pytype src tests From 0c0d75147705e12e6a1678eaa10e3bf9239e9a5a Mon Sep 17 00:00:00 2001 From: rohori Date: Tue, 2 Nov 2021 19:54:50 +0900 Subject: [PATCH 2/5] Add pyre --- .github/workflows/static.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index c2e9315..50aa82d 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -44,6 +44,22 @@ jobs: # treat Usage error, Warning, Error, Fatal as failure. # see bit-encoded exit codes of pylint for detail: # https://pylint.pycqa.org/en/latest/user_guide/run.html#exit-codes + pyre: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: install dependencies + run: | + curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python + echo "$HOME/.local/bin" >> $GITHUB_PATH + poetry install --no-interaction + - name: install pyre + run: poetry add -D pyre-check + - name: pyre + run: poetry run pyre --source-directory src check pyright: runs-on: ubuntu-latest steps: From 9e5cf925c2fde7634029d66c6533f32b36893c43 Mon Sep 17 00:00:00 2001 From: rohori Date: Thu, 18 Nov 2021 10:30:57 +0900 Subject: [PATCH 3/5] Fix pyre errors: cli.internal --- src/psykoda/cli/internal.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/psykoda/cli/internal.py b/src/psykoda/cli/internal.py index 5474e52..3376ad7 100644 --- a/src/psykoda/cli/internal.py +++ b/src/psykoda/cli/internal.py @@ -9,7 +9,7 @@ import warnings from dataclasses import dataclass from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, cast import numpy as np import pandas as pd @@ -55,7 +55,7 @@ def configure_logging(debug: bool): stderr_handler.addFilter(stderr_filter) stderr_handler.setLevel(logging.INFO) stderr_handler.setFormatter(logging.Formatter("%(message)s")) - handlers = [stderr_handler] + handlers: list[logging.Handler] = [stderr_handler] logfile_handler = logging.FileHandler(PATH_LOG) logfile_handler.setLevel(logging.DEBUG) @@ -405,7 +405,7 @@ def main_detection_skip_or_detect( logger.info("outputting detection reports") anomaly_score = detector.compute_anomaly_score(x_test, scale=True) num_anomaly = min( - sum(anomaly_score > anomaly_detection_config.threshold.min_score), + np.count_nonzero(anomaly_score > anomaly_detection_config.threshold.min_score), anomaly_detection_config.threshold.num_anomaly, ) @@ -523,6 +523,7 @@ def report_all(path_list_stats: List[str], path_save: str): [], columns=["datetime_rounded", "src_ip", "subnet", "service"] ) idx = 0 + results_shaps = pd.DataFrame() for path in path_list_stats: # Load stats stats = utils.load_json(path) @@ -545,7 +546,7 @@ def report_all(path_list_stats: List[str], path_save: str): results_pd.loc[idx] = [dt, src_ip, subnet, service] if idx == 0: - results_shaps = pd.DataFrame([], columns=report.columns) + results_shaps.columns = report.columns results_shaps.loc[idx] = report.loc[(dt, src_ip)] idx += 1 @@ -564,13 +565,14 @@ def report_all(path_list_stats: List[str], path_save: str): ret = pd.concat([ret, results_pd_group.get_group(key)]) ret.round(4).to_csv(path_save, index=False) + num_anomaly_ipaddr = len(keys) else: # Anomaly not found pd.DataFrame([["no anomaly found"]]).to_csv(path_save, index=False) + num_anomaly_ipaddr = 0 logger.info("[RESULT]", extra=to_stderr) logger.info("Detection summary file: %s", path_save, extra=to_stderr) - num_anomaly_ipaddr = len(keys) if anomaly_found else 0 logger.info( "Number of unique anomaly IP addresses: %s", num_anomaly_ipaddr, extra=to_stderr ) @@ -719,7 +721,9 @@ def detect_per_unit( label_value=1, ) log_labeled = labeled.factory(config.io.previous.log)[0].load_previous_log( - entries=known_normal.index, + entries=cast(pd.MultiIndex, known_normal.index), + # we can safely assume that known_normal.Index is MultiIndex + # since it is empty otherwise. ) log_labeled = apply_exclude_lists(log_labeled, config.preprocess.exclude_lists) log_labeled = preprocess.extract_log( @@ -784,12 +788,12 @@ def _load_log_catch(load, r): def load_previous( - config: LoadPreviousConfigItem, date_to: datetime, label_value: float + config: Optional[LoadPreviousConfigItem], date_to: datetime, label_value: float ) -> pd.Series: from psykoda.preprocess import round_datetime from psykoda.utils import DateRange - if config.list is None: + if config is None or config.list is None: return pd.Series() def date_filter(row): From f9707828d7f9075fe252a230025965a9e1a33e81 Mon Sep 17 00:00:00 2001 From: Ryuichi OHORI <84692797+rohori@users.noreply.github.com> Date: Mon, 22 Nov 2021 09:27:23 +0900 Subject: [PATCH 4/5] Fix pyre errors: detection (#22) except numpy-or-tensorflow-related issues --- src/psykoda/detection.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/psykoda/detection.py b/src/psykoda/detection.py index 7a4569a..8c21776 100644 --- a/src/psykoda/detection.py +++ b/src/psykoda/detection.py @@ -454,6 +454,9 @@ def compute_anomaly_score( """ # Without type annotation ": ndarray" after score, sphinx treats "score" as type. # some text and a blank line is needed before :shape: too. + + if self.detector is None: + raise AttributeError("detector is not set") score = self.detector.predict(X) if not scale: return score @@ -483,12 +486,15 @@ def compute_embeddings( :shape: (n_samples, dim_embedding) """ + detector = self.detector + if detector is None: + raise AttributeError("detector is not set") if X.shape[0] == 0: return None encoder = tf.keras.Model( - inputs=self.detector.input, - outputs=self.detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output, + inputs=detector.input, + outputs=detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output, ) return encoder.predict(X) @@ -612,7 +618,7 @@ def detection_report( for i, sample in enumerate(shap_value_idx_sorted.index): shap_values = shap_value_idx_sorted.loc[sample].sort_values(ascending=False) - fe = ["__".join(l) for l in list(shap_values.index[:shap_top_k])] + fe: List[Union[int, str]] = ["__".join(l) for l in list(shap_values.index[:shap_top_k])] value = list(shap_values.iloc[:shap_top_k]) for k in range(shap_top_k): if value[k] == 0: From 002988252a7cca25fa2063ae69dbe613c5699abe Mon Sep 17 00:00:00 2001 From: rohori Date: Mon, 22 Nov 2021 09:30:25 +0900 Subject: [PATCH 5/5] black --- src/psykoda/detection.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/psykoda/detection.py b/src/psykoda/detection.py index 8c21776..429a909 100644 --- a/src/psykoda/detection.py +++ b/src/psykoda/detection.py @@ -618,7 +618,9 @@ def detection_report( for i, sample in enumerate(shap_value_idx_sorted.index): shap_values = shap_value_idx_sorted.loc[sample].sort_values(ascending=False) - fe: List[Union[int, str]] = ["__".join(l) for l in list(shap_values.index[:shap_top_k])] + fe: List[Union[int, str]] = [ + "__".join(l) for l in list(shap_values.index[:shap_top_k]) + ] value = list(shap_values.iloc[:shap_top_k]) for k in range(shap_top_k): if value[k] == 0: