Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
.idea/
*/.idea/
/Data/
.pyc

*/__pycache__/
1 change: 1 addition & 0 deletions HDFS_drain3_state.bin
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eJztWm1v2zYQ/iuFsQ9tUagk9W5sA5qkAYJ18RAH24AiMBSJTtToxaPotFnR/z5SlC1SFv0WxfEQf5F0InnkHZ873pH83ps8vM+vv+CQ9vqvekl+MwlIEWc3RkSCOBNPsyJO+LP37lUvwhN6y+oj9l3E6agkgGExMg2+jcLbOIkIzthPCAD7SfKcjrI8wuzP9016POdtpA45szv8wBu+vhgMLt/0xI8RzUWvdS8QPbYzOO8MIm03P7/99bH9oFoozk3X1Q9WECbTgmJSMPrzRn1+ym+ORVvOn9UdUZxOkoBi1tEdzkqOPeBBCHxeoxpI9To7Px2UIx4XxklAAy7BT/zj7xDH95j0eeEF5gTrnxPXSR7eSRwKEvYlMsIFndNXtWCjOOKyfAAAwLJZ/C+X3XVcEzH5n1vkIy6VkLMS+uO3kM1inJdmIYyDiPKjmQbGOWnTyJfgPjDi3Pg4OK2ZtKoC2ZIqnlwNCALXBvwLItPdQBUQGMgGBjQN6Hl9m80g6Cvq+Upi2oTHdXI3Mj3b8xD0geX5FvIt0ykHQnP+TGNChAJL/tAwfQM6QPDX6MuV9AV3oq8SrJCJoNXXH0F4h+kFLiZ5FlUaa/xboQ+wgLcZhM4G898l3yPCB8q/JvEEa5TkK0q6+tF0L1f8h9+dD/UPLnQGitPheZDi4oHxSsWEfRoc//aWf/GCYVlgBAkzEsa9tLR+g2O7z0Syo7Bdz9uRz4RmG+7LgQ/DIMsqvP+JSTyOmUwVfotpGGIc4Uhyk1rhoCUJZ7q+pcMshB0u/PCA2m1dWcVCs/zpQWzKC7/l2uYeLPzNWKeqO8TkXqC3KZ5YvPRCyt7XAu4eiLi4oCsBHWNPH8olpZxp3cy2m64sretpDdfs0HDNg+GuZbjVjLeCOB/P502yZ5Kny7EtO2pmwJapnXCnwwl3DhO+YXwRRUOaExzVAUYJgN+DCf+eTiLWdSS7u7jgT9ZOwKX2cQ2UtMPCVmDhQbCXTu/xCV0zGj/OWQAUzlgSXAjveS28KWbStAfoQNKXrTUhu0MTsg8mtHLlX+Yvl21ytLrTdkNxpIn3nSfPX4FpI5G/+rIWVN2oL8Xyl72qnN2FBvTNZTk78CShd7XVowq8wnMGxWJ8R/Ak4fkMXihhvjPgkH5dvGlopT1Cko39yadcK/4M9UqMSwMy27ahtwQHku+nJMiKMW51hwsYaRccPoPgm9g737SqXX/VkkhOAK+zI3OcT5OydpZTwUCocRZMFZT9STUqkrezkOlolwLQ4VIADktBKzQuK8TLFrJp7ifPp+f/7zGvt2054rP2QM7NNi7QQji4/pY9lNfwZzy5OB1y4VnEWcp6ghO8sAM/S+viZHlYAuUV2tMHpFaHXsh6oV5ot4FJ+3TLe8neHlhvtzFJu8zy1qO3j/uOXUUj22WkcqymdwBuhw7AfaEOoHr99eHifA0PsHxT5wJH0ywKsnKO1XIx/f9MWaLaBJJ0HMNe9Ur/rs5gHRcCz3MsDVpkBwJ3bEyy4lbGb4t+oy2FuRGRe8c2Zco2hXQ2hTq0KfTCberpIsMMU2OY80aXcYrzKVURAhGL/cs8P42TROylUlGxdO23VQT2NZhfk6iMMLzlZ6hJDclrLEAWRA9SNU4boj5/9uuBMbxWPIpqhMeC/BwK2AqbX5WsI3MtsHYYAaKXGgE2wLrGVtvcMWWsLzKdsDlt+qhZkZjtJYA7G9ROXwLftmCqX63IT/CYGhrEKYf+WsR1eOKPDif+6xyT8Cm/aaaSsxwyH4+r5U9seS/kmtWuF1hYZFNMg3ZOK0J3JCfc2qshqMMDR/TiDhy3vhm4xZnakntw++bo+C2t5Jf3jeuPniVusBKc5hRXxertxatysWbC+b6zlXOUd2S0V/hgh3f44OES365y9CYDAYz7IImjd/wryEpe/HadSExETMg7p+LuJ801uDHlwyZtGNfhaYL2MOHpp7JdAWgdw1kYWNneniWyJQVdhURqqamSFlRJXyEdUyFttTJSO7IbldWO3AaptoWNftXKTqMjlXTUtkglLXVUrlrqqR35amVfrdwYMmi0VUshbNANUmXmNXgjbgL/Adcvbuk=
1 change: 1 addition & 0 deletions anomalydetection/att_all_you_need/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- coding: UTF-8 -*-
Binary file not shown.
Binary file not shown.
Binary file not shown.
141 changes: 141 additions & 0 deletions anomalydetection/att_all_you_need/encoder_self_att_predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# -*- coding: UTF-8 -*-

# -*- coding: UTF-8 -*-
# -*- coding: UTF-8 -*-
import torch
import json
import pandas as pd
import numpy as np
import os
import torch.nn as nn
import time
import random
from torch.utils.data import TensorDataset, DataLoader
from anomalydetection.att_all_you_need.encoder_self_att_train import Encoder

# use cuda if available otherwise use cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# len(line) < window_length


def make_src_mask(src, src_pad_idx):
# src = [batch size, src len]

src_mask = (src != src_pad_idx) #

# src_mask = [batch size, src len] #

return src_mask.clone().detach().numpy().tolist()


def load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, dropout, num_of_heads, pf_dim):

model1 = Encoder(input_size, num_classes, hidden_size, num_layers, num_of_heads, pf_dim, dropout, device).to(device)
model1.load_state_dict(torch.load(model_path, map_location='cpu'))
model1.eval()
print('model_path: {}'.format(model_path))
return model1

def filter_small_top_k(predicted, output):
filter = []
for p in predicted:
if output[0][p] > 0.001:
filter.append(p)
return filter


def generate_robust_seq_label(file_path, sequence_length):
num_of_sessions = 0
input_data, output_data, mask_data = [], [], []
train_file = pd.read_csv(file_path)
i = 0
while i < len(train_file):
num_of_sessions += 1
line = [int(id) for id in train_file["Sequence"][i].split(' ')]
line = line[0:sequence_length]
if len(line) < sequence_length:
line.extend(list([0]) * (sequence_length - len(line)))
input_data.append(line)
output_data.append(int(train_file["label"][i]))
i += 1
data_set = TensorDataset(torch.tensor(input_data), torch.tensor(output_data))
return data_set


def get_batch_semantic_with_mask(seq, pattern_vec_file):
with open(pattern_vec_file, 'r') as pattern_file:
class_type_to_vec = json.load(pattern_file)
print(seq.shape)
batch_data = []
mask_data = []
for s in seq:
semantic_line = []
for event in s.numpy().tolist():
if event == 0:
semantic_line.append([-1] * 300)
else:
semantic_line.append(class_type_to_vec[str(event)])
batch_data.append(semantic_line)
mask = make_src_mask(s, 0)
mask_data.append(mask)
return batch_data, mask_data


def do_predict(input_size, hidden_size, num_layers, num_classes, sequence_length, model_path, test_file_path, batch_size, pattern_vec_json, dropout, num_of_heads, pf_dim):

sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, dropout, num_of_heads, pf_dim)

start_time = time.time()
TP = 0
FP = 0
TN = 0
FN = 0

# create data set
sequence_data_set = generate_robust_seq_label(test_file_path, sequence_length)
# create data_loader
data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=False, pin_memory=False)

print('predict start')
with torch.no_grad():
for step, (seq, label) in enumerate(data_loader):
# first traverse [0, window_size)
batch_data, mask_data = get_batch_semantic_with_mask(seq, pattern_vec_json)
seq = torch.tensor(batch_data)
mask_data = torch.tensor(mask_data)
seq = seq.view(-1, sequence_length, input_size).to(device)
#label = torch.tensor(label).view(-1).to(device)
output = sequential_model(seq, mask_data)[:, 0].clone().detach().cpu().numpy()
predicted = (output > 0.5).astype(int)
label = np.array([y for y in label])
TP += ((predicted == 1) * (label == 1)).sum()
FP += ((predicted == 1) * (label == 0)).sum()
FN += ((predicted == 0) * (label == 1)).sum()
TN += ((predicted == 0) * (label == 0)).sum()
ALL = TP + TN + FP + FN
# Compute precision, recall and F1-measure
if TP + FP == 0:
P = 0
else:
P = 100 * TP / (TP + FP)

if TP + FN == 0:
R = 0
else:
R = 100 * TP / (TP + FN)

if P + R == 0:
F1 = 0
else:
F1 = 2 * P * R / (P + R)

Acc = (TP + TN) * 100 / ALL

print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
print('Finished Predicting')
elapsed_time = time.time() - start_time
print('elapsed_time: {}'.format(elapsed_time))

#draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
Loading