Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions .github/workflows/static.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
# treat Usage error, Warning, Error, Fatal as failure.
# see bit-encoded exit codes of pylint for detail:
# https://pylint.pycqa.org/en/latest/user_guide/run.html#exit-codes
mypy:
pyre:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand All @@ -56,5 +56,40 @@ jobs:
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python
echo "$HOME/.local/bin" >> $GITHUB_PATH
poetry install --no-interaction
- name: mypy
run: poetry run mypy src tests
- name: install pyre
run: poetry add -D pyre-check
- name: pyre
run: poetry run pyre --source-directory src check
pyright:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: install dependencies
run: |
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python
echo "$HOME/.local/bin" >> $GITHUB_PATH
poetry install --no-interaction
- uses: actions/setup-node@v2
- name: install pyright
run: npm install -g pyright
- name: pyright
run: poetry run pyright src tests
pytype:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: install dependencies
run: |
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python
echo "$HOME/.local/bin" >> $GITHUB_PATH
poetry install --no-interaction
- name: install pytype
run: poetry add -D pytype
- name: pytype
run: poetry run pytype src tests
20 changes: 12 additions & 8 deletions src/psykoda/cli/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import warnings
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -55,7 +55,7 @@ def configure_logging(debug: bool):
stderr_handler.addFilter(stderr_filter)
stderr_handler.setLevel(logging.INFO)
stderr_handler.setFormatter(logging.Formatter("%(message)s"))
handlers = [stderr_handler]
handlers: list[logging.Handler] = [stderr_handler]

logfile_handler = logging.FileHandler(PATH_LOG)
logfile_handler.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -405,7 +405,7 @@ def main_detection_skip_or_detect(
logger.info("outputting detection reports")
anomaly_score = detector.compute_anomaly_score(x_test, scale=True)
num_anomaly = min(
sum(anomaly_score > anomaly_detection_config.threshold.min_score),
np.count_nonzero(anomaly_score > anomaly_detection_config.threshold.min_score),
anomaly_detection_config.threshold.num_anomaly,
)

Expand Down Expand Up @@ -523,6 +523,7 @@ def report_all(path_list_stats: List[str], path_save: str):
[], columns=["datetime_rounded", "src_ip", "subnet", "service"]
)
idx = 0
results_shaps = pd.DataFrame()
for path in path_list_stats:
# Load stats
stats = utils.load_json(path)
Expand All @@ -545,7 +546,7 @@ def report_all(path_list_stats: List[str], path_save: str):
results_pd.loc[idx] = [dt, src_ip, subnet, service]

if idx == 0:
results_shaps = pd.DataFrame([], columns=report.columns)
results_shaps.columns = report.columns
results_shaps.loc[idx] = report.loc[(dt, src_ip)]

idx += 1
Expand All @@ -564,13 +565,14 @@ def report_all(path_list_stats: List[str], path_save: str):
ret = pd.concat([ret, results_pd_group.get_group(key)])

ret.round(4).to_csv(path_save, index=False)
num_anomaly_ipaddr = len(keys)
else:
# Anomaly not found
pd.DataFrame([["no anomaly found"]]).to_csv(path_save, index=False)
num_anomaly_ipaddr = 0

logger.info("[RESULT]", extra=to_stderr)
logger.info("Detection summary file: %s", path_save, extra=to_stderr)
num_anomaly_ipaddr = len(keys) if anomaly_found else 0
logger.info(
"Number of unique anomaly IP addresses: %s", num_anomaly_ipaddr, extra=to_stderr
)
Expand Down Expand Up @@ -719,7 +721,9 @@ def detect_per_unit(
label_value=1,
)
log_labeled = labeled.factory(config.io.previous.log)[0].load_previous_log(
entries=known_normal.index,
entries=cast(pd.MultiIndex, known_normal.index),
# we can safely assume that known_normal.Index is MultiIndex
# since it is empty otherwise.
)
log_labeled = apply_exclude_lists(log_labeled, config.preprocess.exclude_lists)
log_labeled = preprocess.extract_log(
Expand Down Expand Up @@ -784,12 +788,12 @@ def _load_log_catch(load, r):


def load_previous(
config: LoadPreviousConfigItem, date_to: datetime, label_value: float
config: Optional[LoadPreviousConfigItem], date_to: datetime, label_value: float
) -> pd.Series:
from psykoda.preprocess import round_datetime
from psykoda.utils import DateRange

if config.list is None:
if config is None or config.list is None:
return pd.Series()

def date_filter(row):
Expand Down
14 changes: 11 additions & 3 deletions src/psykoda/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ def compute_anomaly_score(
"""
# Without type annotation ": ndarray" after score, sphinx treats "score" as type.
# some text and a blank line is needed before :shape: too.

if self.detector is None:
raise AttributeError("detector is not set")
score = self.detector.predict(X)
if not scale:
return score
Expand Down Expand Up @@ -483,12 +486,15 @@ def compute_embeddings(
:shape: (n_samples, dim_embedding)
"""

detector = self.detector
if detector is None:
raise AttributeError("detector is not set")
if X.shape[0] == 0:
return None

encoder = tf.keras.Model(
inputs=self.detector.input,
outputs=self.detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output,
inputs=detector.input,
outputs=detector.get_layer(LAYERNAME_ENCODER_OUTPUT).output,
)
return encoder.predict(X)

Expand Down Expand Up @@ -612,7 +618,9 @@ def detection_report(

for i, sample in enumerate(shap_value_idx_sorted.index):
shap_values = shap_value_idx_sorted.loc[sample].sort_values(ascending=False)
fe = ["__".join(l) for l in list(shap_values.index[:shap_top_k])]
fe: List[Union[int, str]] = [
"__".join(l) for l in list(shap_values.index[:shap_top_k])
]
value = list(shap_values.iloc[:shap_top_k])
for k in range(shap_top_k):
if value[k] == 0:
Expand Down