diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 385fa3c..50aa82d 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -44,7 +44,7 @@ jobs: # treat Usage error, Warning, Error, Fatal as failure. # see bit-encoded exit codes of pylint for detail: # https://pylint.pycqa.org/en/latest/user_guide/run.html#exit-codes - mypy: + pyre: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -56,5 +56,40 @@ jobs: curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python echo "$HOME/.local/bin" >> $GITHUB_PATH poetry install --no-interaction - - name: mypy - run: poetry run mypy src tests + - name: install pyre + run: poetry add -D pyre-check + - name: pyre + run: poetry run pyre --source-directory src check + pyright: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: install dependencies + run: | + curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python + echo "$HOME/.local/bin" >> $GITHUB_PATH + poetry install --no-interaction + - uses: actions/setup-node@v2 + - name: install pyright + run: npm install -g pyright + - name: pyright + run: poetry run pyright src tests + pytype: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: install dependencies + run: | + curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python + echo "$HOME/.local/bin" >> $GITHUB_PATH + poetry install --no-interaction + - name: install pytype + run: poetry add -D pytype + - name: pytype + run: poetry run pytype src tests diff --git a/src/psykoda/cli/internal.py b/src/psykoda/cli/internal.py index 5474e52..3376ad7 100644 --- a/src/psykoda/cli/internal.py +++ b/src/psykoda/cli/internal.py @@ -9,7 +9,7 @@ import warnings from dataclasses import dataclass from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, cast import numpy as np import pandas as pd @@ -55,7 +55,7 @@ def configure_logging(debug: bool): stderr_handler.addFilter(stderr_filter) stderr_handler.setLevel(logging.INFO) stderr_handler.setFormatter(logging.Formatter("%(message)s")) - handlers = [stderr_handler] + handlers: list[logging.Handler] = [stderr_handler] logfile_handler = logging.FileHandler(PATH_LOG) logfile_handler.setLevel(logging.DEBUG) @@ -405,7 +405,7 @@ def main_detection_skip_or_detect( logger.info("outputting detection reports") anomaly_score = detector.compute_anomaly_score(x_test, scale=True) num_anomaly = min( - sum(anomaly_score > anomaly_detection_config.threshold.min_score), + np.count_nonzero(anomaly_score > anomaly_detection_config.threshold.min_score), anomaly_detection_config.threshold.num_anomaly, ) @@ -523,6 +523,7 @@ def report_all(path_list_stats: List[str], path_save: str): [], columns=["datetime_rounded", "src_ip", "subnet", "service"] ) idx = 0 + results_shaps = pd.DataFrame() for path in path_list_stats: # Load stats stats = utils.load_json(path) @@ -545,7 +546,7 @@ def report_all(path_list_stats: List[str], path_save: str): results_pd.loc[idx] = [dt, src_ip, subnet, service] if idx == 0: - results_shaps = pd.DataFrame([], columns=report.columns) + results_shaps.columns = report.columns results_shaps.loc[idx] = report.loc[(dt, src_ip)] idx += 1 @@ -564,13 +565,14 @@ def report_all(path_list_stats: List[str], path_save: str): ret = pd.concat([ret, results_pd_group.get_group(key)]) ret.round(4).to_csv(path_save, index=False) + num_anomaly_ipaddr = len(keys) else: # Anomaly not found pd.DataFrame([["no anomaly found"]]).to_csv(path_save, index=False) + num_anomaly_ipaddr = 0 logger.info("[RESULT]", extra=to_stderr) logger.info("Detection summary file: %s", path_save, extra=to_stderr) - num_anomaly_ipaddr = len(keys) if anomaly_found else 0 logger.info( "Number of unique anomaly IP addresses: %s", num_anomaly_ipaddr, extra=to_stderr ) @@ -719,7 +721,9 @@ def detect_per_unit( label_value=1, ) log_labeled = labeled.factory(config.io.previous.log)[0].load_previous_log( - entries=known_normal.index, + entries=cast(pd.MultiIndex, known_normal.index), + # we can safely assume that known_normal.Index is MultiIndex + # since it is empty otherwise. ) log_labeled = apply_exclude_lists(log_labeled, config.preprocess.exclude_lists) log_labeled = preprocess.extract_log( @@ -784,12 +788,12 @@ def _load_log_catch(load, r): def load_previous( - config: LoadPreviousConfigItem, date_to: datetime, label_value: float + config: Optional[LoadPreviousConfigItem], date_to: datetime, label_value: float ) -> pd.Series: from psykoda.preprocess import round_datetime from psykoda.utils import DateRange - if config.list is None: + if config is None or config.list is None: return pd.Series() def date_filter(row): diff --git a/src/psykoda/detection.py b/src/psykoda/detection.py index 7a4569a..429a909 100644 --- a/src/psykoda/detection.py +++ b/src/psykoda/detection.py @@ -454,6 +454,9 @@ def compute_anomaly_score( """ # Without type annotation ": ndarray" after score, sphinx treats "score" as type. # some text and a blank line is needed before :shape: too. + + if self.detector is None: + raise AttributeError("detector is not set") score = self.detector.predict(X) if not scale: return score @@ -483,12 +486,15 @@ def compute_embeddings( :shape: (n_samples, dim_embedding) """ + detector = self.detector + if detector is None: + raise AttributeError("detector is not set") if X.shape[0] == 0: return None encoder = tf.keras.Model( - inputs=self.detector.input, - outputs=self.detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output, + inputs=detector.input, + outputs=detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output, ) return encoder.predict(X) @@ -612,7 +618,9 @@ def detection_report( for i, sample in enumerate(shap_value_idx_sorted.index): shap_values = shap_value_idx_sorted.loc[sample].sort_values(ascending=False) - fe = ["__".join(l) for l in list(shap_values.index[:shap_top_k])] + fe: List[Union[int, str]] = [ + "__".join(l) for l in list(shap_values.index[:shap_top_k]) + ] value = list(shap_values.iloc[:shap_top_k]) for k in range(shap_top_k): if value[k] == 0: