From c8f2227cdb9b627c7c2243ca7382e7e98741450e Mon Sep 17 00:00:00 2001
From: cainiao66 <17717084193@163.com>
Date: Thu, 30 Apr 2020 09:51:14 +0800
Subject: [PATCH 1/4] add loganomaly_quantitive_train/predict

---
 .../log_anomaly_quantitive_predict.py         | 122 ++++++++++++++++++
 .../log_anomaly_quantitive_train.py           |  99 ++++++++++++++
 2 files changed, 221 insertions(+)
 create mode 100644 anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
 create mode 100644 anomalydetection/loganomaly/log_anomaly_quantitive_train.py

diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
new file mode 100644
index 0000000..5aad10d
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
@@ -0,0 +1,122 @@
+import torch
+import os
+import torch.nn as nn
+import time
+import numpy as np
+from anomalydetection.loganomaly.log_anomaly_quantitive_train import Model
+from anomalydetection.loganomaly.log_anomaly_quantitive_train import train_model
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def generate_test_label(logkey_path, window_length):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input = np.zeros((length -window_length,num_of_classes))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input[i][keys[j]-1] += 1
+        output[i] = keys[i+window_length]-1
+    new_input = np.zeros((length -2*window_length+1,window_length,num_of_classes))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input[i][j-i] = input[j]
+    new_output = output[window_length-1:]
+    print(new_input.shape)
+    print(new_output.shape)
+    print(new_input[0])
+    print(new_output[0])
+    return length,new_input,new_output
+
+def load_quantitive_model(input_size, hidden_size, num_layers, num_classes, model_path):
+    model2 = Model(input_size, hidden_size, num_layers, num_classes).to(device)
+    model2.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model2.eval()
+    print('model_path: {}'.format(model_path))
+    return model2
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
+    quantitive_model = load_quantitive_model(input_size, hidden_size, num_layers, num_classes, model_path)
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    length,input,output = generate_test_label(logkey_path, window_length)
+    abnormal_label = []
+    with open(anomaly_test_line_path) as f:
+        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for i in range(0,length-2*window_length+1):
+            lineNum = i + 2*window_length
+            quan = input[i]
+            label = output[i]
+            quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size).to(device)
+            test_output = quantitive_model(quan)
+            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
+            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
+            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
+                i += 2*window_length + 1
+            else:
+                i += 1
+            ALL += 1
+            if label not in predicted:
+                if lineNum in abnormal_label:
+                    TN += 1
+                else:
+                    FN += 1
+            else:
+                if lineNum in abnormal_label:
+                    FP += 1
+                else:
+                    TP += 1
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+input_size = 61
+hidden_size = 30
+num_of_layers = 2
+num_of_classes = 61
+num_epochs = 100
+batch_size = 200
+window_length = 5
+train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+model_out_path = train_root_path + 'quantitive_model_out/'
+
+# train_model(window_length, input_size, hidden_size,
+#             num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
+#             model_out_path,train_logkey_path)
+
+do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length,
+           model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
+
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
new file mode 100644
index 0000000..e81b506
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
@@ -0,0 +1,99 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+import numpy as np
+import argparse
+import os
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class Model(nn.Module):
+    def __init__(self, input_size, hidden_size, num_of_layers, out_size):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size, out_size)
+
+    def init_hidden(self, size):
+        h0 = torch.zeros(self.num_of_layers, size, self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_of_layers, size, self.hidden_size).to(device)
+        return (h0, c0)
+
+    def forward(self, input):
+        out, _ = self.lstm(input, self.init_hidden(input.size(0)))
+        out = self.fc(out[:, -1, :])
+        return out
+
+def generate_quantitive_label(logkey_path, window_length,num_of_classes):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input = np.zeros((length -window_length,num_of_classes))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input[i][keys[j]-1] += 1
+        output[i] = keys[i+window_length]-1
+    new_input = np.zeros((length -2*window_length+1,window_length,num_of_classes))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input[i][j-i] = input[j]
+    new_output = output[window_length-1:]
+    print(new_input.shape)
+    print(new_output.shape)
+    print(new_input[0])
+    print(new_output[0])
+    dataset = TensorDataset(torch.tensor(new_input,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
+    return dataset
+
+def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
+    # log setting
+    log_directory = root_path + 'quantitive_log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    model = Model(input_size, hidden_size, num_of_layers, num_of_classes).to(device)
+    # create data set
+    quantitive_data_set = generate_quantitive_label(logkey_path, window_length,num_of_classes)
+    # create data_loader
+    data_loader = DataLoader(dataset=quantitive_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters())
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (quan, label) in enumerate(data_loader):
+            quan = quan.clone().detach().view(-1, window_length, input_size).to(device)
+            output = model(quan)
+
+            loss = criterion(output, label.to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % 100 == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
+
+
+
+
+
+
+
+

From 68408608baf203735c5486bde01c73926c7a8638 Mon Sep 17 00:00:00 2001
From: cainiao66 <17717084193@163.com>
Date: Thu, 7 May 2020 12:40:03 +0800
Subject: [PATCH 2/4] fix log_anomaly

---
 .../__pycache__/__init__.cpython-37.pyc       | Bin 0 -> 198 bytes
 ...og_anomaly_quantitive_train.cpython-37.pyc | Bin 0 -> 3412 bytes
 ..._anomaly_sequential_predict.cpython-37.pyc | Bin 0 -> 3287 bytes
 ...og_anomaly_sequential_train.cpython-37.pyc | Bin 0 -> 3832 bytes
 .../log_anomaly_train.cpython-37.pyc          | Bin 0 -> 4309 bytes
 .../loganomaly/log_anomaly_predict.py         | 122 ++++++++++++++++++
 .../log_anomaly_quantitive_predict.py         |   6 +-
 .../log_anomaly_quantitive_train.py           |   4 +-
 .../log_anomaly_sequential_predict.py         |   3 +
 .../log_anomaly_sequential_train.py           |   2 +
 .../loganomaly/log_anomaly_train.py           | 121 +++++++++++++++++
 11 files changed, 252 insertions(+), 6 deletions(-)
 create mode 100644 anomalydetection/loganomaly/__pycache__/__init__.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/log_anomaly_predict.py
 create mode 100644 anomalydetection/loganomaly/log_anomaly_train.py

diff --git a/anomalydetection/loganomaly/__pycache__/__init__.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17b247bb22f2c009cd882030eab5a6488a96c89d
GIT binary patch
literal 198
zcmZ?b<>g`kg1wVk<3RLd5CH>>K!yVl7qb9~6oz01O-8?!3`HPe1o1To$aS@fdAelz
z)3wV$)RP^ZPj+<2m@2&3yZ*`UZO^uLzgRyt#wS1BF)uNvvN*FCnXOx#k(rkopORV@
zlbDyE3sjJjT9TSvl9`_ularqg6^n_F&&<m#iI3MSsJz8tlbfGXnv-e=a!4@{GXMal
C0ziiV

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37c9d88256cd4afef5d5dc2e2bae3a62944f8c06
GIT binary patch
literal 3412
zcmZ`*&2QYs6`$ePazC~DmTWgsKMvi(l>;|H3dbp&4<{E{huVMxRtZ7xa3#uIay1-T
zmdq^-$VE^D1`Gqe?*c`O9*P`-1_6rxBXg}&#FyTB$))|h;W||flmutqym_2?-|xNK
zZZ{a7FMs*PC#!Re{hbys9tXfRe8p1?f(f3opqly63M^j1_Rt9&16!#(^a9Voc3K<y
zfj_JV^%-3wXqq!xLCehTpdGVGCuaL|lkSwsfp9WQxKkc<K4iiZwMR_UVrzf?)D9Me
zFY1q2uqYa$iFrxzo2=J*4!+oSkL&i$!{IQJ$G2soV%f8F`&OJ4xqLrTQ4uTMAYdzx
zL@c$_KNOL+`=i4?{r=*yXFzcgK+FOzSYUn123&AqJ@Nxv*uufpj&OyC+5MC`Ebzca
zO}jU85vP53b_C5HK030vhCwmKOLl5atSRHHv`QA*rH#>1?!H%YU~IbPu>)L9xf354
zXG(N+!ElAWeG5`H??kG<7Y-6xs9r<6#o;KHvW5$^ABIVms4&#cy+nvO3yb6=*6r+Y
z80Ld8jgDhkXe-OKvvu><4eg|b8ftGV$>K<AYtZkx+9~36pc_dxI#lL&*~HPZWzNrM
zIIvL<!+shSMHv2x{p%riuV2~u`uBhQ`p<u$>DgDmefHI7J8x`!^S95R{q@U#{N=N6
zKL6d$R=)dA7Ny5UQk;c<TI_*?P{j9kqAVYRK*TEUt0d2M(tLM@BQTu7;lW{)sYE6B
z;!w#b$u>vFl3ciruQ0yMJKV#&#7{cLE}JiO)3-oF4>>j&={<bK&oL-Afr0idn1b!w
zQ+oo#O!?UQKX{uAHn3nAzTNY*t8&@j)9y(u^TO!MXii%y*B)d``f<;Z3%Cb5hFJn@
z?=@}pUz009Rl_Q6HjN1@`z--ckP|V!hWBK?66FlV|48G5&`5I?K*=VIB!NtPVhKK_
z*V?hh))-DV0rv<%_8Uvg=7GJ=NZOJlvbQD28PETX2V5g3F!>4%7igHr@In^Xc#5{+
z8k&bM3sW29@+gvG9e>OOxxg>*lXfM@Y!|@;*Uh3~4A*Uh;V>76DZ#CfES*+6#=sR9
zKg4mmJ@deRoTlNKn|oX?6PKh-6Bp#W=5n96YQAf5eF{r@$YV&uMsFE}R?wW_r=%r&
z!l#VFb?hix!5d2F)H>x@d4M=)sz&qJRsN)2x+PyjSPv{{(Nq5Z3<JnTP+Q~L{ZCXw
z*cIKW^*;M}YwX{@tQw_{m#-o<0Jkx&mtN_wv(hQ+>r6Ona8jDzL?*e)4t{-`b_{*B
zTh>Y+`2E|g;zEFrCANk?R>}U>zy?2!`&-rBXLrqgm%+oumv}g5c);x?c*9sP{WaYE
zgdbS9*}*@-L9OJ(8TrpT8#h%`wf5U(17n9|H7ml+(F(e3L1T^muF;uSHY=!X!PC5L
z%_Z`QE7UO(nejR^P?+}SwUdwHO#3p9M2akdd^buHh18W4O54L|r0pbA+8s%n>=eXS
zHj?CU+LckZ8%r{^TqA_1OhM4@Ak8B>B+qtn;$Ae0du`oBhPfLbhoeaC>Gn~QiTo(U
zfxB3CW;xX4ytvSOpdHW_+N%iUhB+TmfV`=BqWQkAXYmmdG9elS&NgeJsz)!!%KC1c
z#WGUy+3c1^cjA;HulO+r#v8na%;w`=KyIUV1BroRYKgzXyZi^BXmI%fX!|A)0yu}S
zj?cLGQz*Br3Ao$1A&}Zi?-a1GSIO~Rf?-ai3`%ZrZf9aCa-f!iJC<<0Q+wP-Ahb(Q
zm?HSt8F$FIWfP~=9`o^>s!e=^Ojp$>jj}aqPARR-KjHU1)uJ2V<ll}_%g2lNN2*<R
z3`QU^E#X@puawJ^4#I8@Usuqb9I3eCV!3RU%LusDvRkf}D-{;*wN}ng=1o;yn=F`l
zv0R%hPT8AmyokLe!W}$Ch%Af7WTjjX;1+?ns8-9){@N)&&!#pxXSE|*D@^!zE%{Kb
z@1L8TFa0Th!i#s+`s4z^=w)8koH@~GZ(V(F@(SdBb;?R#v~kZmG}BpSQ}jv)4-q_b
zlMUhryv)&^rnYytPZrM<HL7)RmrEzzcSJM{tCCMu<L#5$Tk$CG@4bEN1Ek>VFsp9-
zcU#wry~DvEjdf#`WZ^I#=JL450}ls^M4IrDui~eZA6=(YHh%SBeCfgX<BJ=nj3(J`
znC3-sW#hq>&DRHG`4f;n7Z|?fmrm@>kvf^bMfDbh8v{(mUMzaw*LI<*7DSS-$@4Uj
z3rec7k|J4Bp4Hv=P#cTunUeYFcnhF*^N~u1+95H)b0kp&(7YA$4Wh5d2s;xK+U+A)
zW2&SoLYZ~$CGnB2Q?U(qWhAtJC+gp&QllMoe>kyF@km~#Q#_QJVWi~Gh!3MND=)NT
z!rV;?=tcX(=xz+4sPcqUME9V=LQyn|q>uc9>x;N|PQFElEYk2@UnHsABE->U8PgQ<
zLm+~>%yVUuNw9*77N@fWfCDAE8yH4jA-Xmh`#W6Fq9r64rMO(X=Fi})Z&3OO(a)lj
zCJ32~8d~qjbiF~nZ)WsPx^8p}r;*w<j0Fv2Wn*Ib5i#wM&uQ0W8F|fM$o_#FgJ}2V
z4ArcDH!Qsl;(`Wd`Fzb<vRYP`cWfVDmpgm~8OgW0Sfl9T-9T>YATwP6<a3$ezut1C
zY~M6an^{)*NV8fR-sLvvf>xEa@8q!VN5-3Uw<0=JNm^`@8mc5u?xtNB0_`C2ru|6n
z!l{Z_23S^(Nj;x#B}EwBixNnVVDWC4j>B{YmE#hFXT&!>nrYAcs}LXE$uFo#6@;$@
QY(YR0*0<KIg#~}*KNw$+;Q#;t

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a6a90f862c8a2dfb3737e6919cea587a42e49e0
GIT binary patch
literal 3287
zcmZuz-ESMY5hqu>k}ON|m;9CZ9Ob)e6Qe;}Twnx2bIlnjkQ!+&z#UYl!e+U)*Iun;
zE|m`-7bxl!XmL0sedxOc6g{+PaZfD{DEfaCd~L2czaG$+ytKceEZYrk7bJ(nA!j%o
z{)X>WDkX;R>)Aj3yfVeuzo;|%To6A*Nd+pw1dmyB*zzW~eW#sk=J=53HeK+X*lXvT
zdCo%CFNAET*dH4(`JTunj&KLOSrDGczhKRhD2O83aWN)JXv>0sz#8L+(8Sgo+;~6f
zh%h#8&=bDN1>IiY&=5WkWenv<DCs6D#SU2JD6V1Qf%EcSmV0(r=lafo-DQt%X0FO9
zS2!Z~f(ci6FPL`w-hlP<B0u1F*#YJp@XXatzo2s3J>Ub*+1}EJ>_hD9r|bdyh-nA5
z71j-x0f$2Rs3l82KuPCNDW<q`H0$TEwmV?7DrWKZhG#t0>&BtUwf(N~Hsg-3jF&1I
zp_g{!NMXM``6H8Mtn;5=qI!RI<ICTF_T^vxK+Wqfe)IZ^Pd0u~|LPx~zyAB*{{7ca
zzWV%k8}~X}cM?B-o<`|u`g*z@C1Fd1Pd5Cd(}sWuRT!wKlWfGDts@?hts}aXhI_p*
zQIQ|Fx-t||pjNuiP4QOjw;v1tHll*1C~W`CR+xm+SK-R7xD)tsdK<)0=-4lGN)$KN
zFH0b{m}ZY^8COj614PZPD_zZh>+HGY9Z7z~_H%KVG#ulseAn=2)(^v~g5fu%TXB@C
zpwsRSjUgR*w!idE=8Od2@N12tacQD)JKZobV>0wb4CkaWj|F6bDheb?H^AimE~Erb
z%T4@tXev*mM0B3EVAz)0ZcdZOcEjiCk-LI0HGGp4z$J8UC-$1+$S|@#r9sB?yv(co
zN^Xfu@=8FlLm!z!#YXdcAmAr>N;^7t;Pw$w1D<)>>m!DDJnafDoKL~y47~gRVW<is
z*Dq?^tpm%2SKW0<M*z2$5pgX#D?0!EYMbs<tLv-n)%wR-V@#4KjiWjykAR7S&`cmW
zMJrV}<Q7E&LboG>ZR`_#<AzWC*mzwTCCcPCJF@MoM&6X7q}x-iG};f%_;w^jm<&6m
zq}Oi6{_{|#W}N6C_R|!eE1g)<%v<`#+fJMvT5=MbR%{UlrxvlC8n5vpd@8>KUSN43
zuA&fa{kfcDEMi#+J{_~z@E{u(PF8-lrpNo-;=^iI(Um^XVZb+>teQ;_S}0FXkaxi+
zzY;!7>as5B$y-d9g*VT#^4{`kgiwXiscG-P%cfMZUmUP(8ro)1YACZPb13s_Oc#JN
zPPVYcvPIaIKj1Ix*%^@+1)$TJeu>bDw3CI<y?BB|YFrfQoK?AB8L(IU<qpu-$<CrI
zz2eV4(hGONeYBocz$t^Xs>`aXD+gS=3v8FYa^y|DB*tpk$2^d5p4r{Jqt6QOU6!5I
z73{N2{9ieXEL*mH^gy3e<NDlC_s)b~R^=T}ls<L(lVV)EnhkRODK(8~U54E9&Ww%5
zDZLE)>)_Ontgh+0@CJ^+>bf`%gtdCkV_)U|tUjyH>k?2HdKR&wUE>|SxSM~)<v+<T
zvKDrhlOml1QF`9C^C$E4`P~BcxuAzL9a+`0R%RBn<}5XrEFF(t(>2ShVRXh)Gq3m_
z&>wyx4NH4ppFU|lqigV*v+F>IfR`^MlcJ($;88nrmSq<}%^l198ZsA8$zTLB7xe{w
z@mS{1khzr9L{(qFIu}vwDC90-ty#!bz?(<^GCVZ_Pu27sjSP^l5DnvE61>YaOJ6$X
zP1!LUiF$g0Wmh0K4Y?~rtPlX{#>~1TD6%-bpZnSBqc^;MNT3{yGTr|ndJQ8M(aR%5
zA0oNEy@caHhK3_2X-P^PZ{2D+4c@yb=@Eu6i_US<Ih9mKD864g%1CuUl2nIuv!TTk
z>UlRj{4c)!s~>0e>-CYC4AWk$2;HvLmF$J}*nb?x)IX$0%ps}WUtYUElvrCIwhxc`
z4{d+{{X0Q`fz_4oZDN`GG7O>=_&H)-zE*z_BK^^_RtAi<8`s;RpY~*U%-o+@i;^ha
z4h1biU>YS``;%epchgX`RMaNftT88NVU(mdgv~^!vQv?GHboJ}8JFhR^kIslWF{|>
z%y}9vB%Ril^o5+DUS5Ss+L6Y65`|CYGD)2yzL#$M-Ov<#xrJdRFN4{*DzA_Lfv5Zy
zRSl{LY~{C6HRnce1r?^MMLExAk!IEQH=}sew;=B)A`-|TshLEsQK3v)Pr{%@VrGgc
z)fr}j79FE0kA$oMkK|dXG=(Fs$zg`6j%3h5Mrux;dbHTYU`jSZtXrGT!~0EV4dvlF
z+Vy7c&b@oh3IB0I87oFY^hGF})e{B<i9AC)9Sb_WM76Mw=2Xy=5)UJ5xZQNKKwDq$
zwM`ECjc0?)<U2iu{MQ^i3c^qre&6s1P<;ccgTRzeANpHY!jw*~zy(;4{sI-_F5U|+
zvRjoeAj{1;lTH!+(O&{p;+HT&QY7a(b2cANf>Y#mUV?m)pTS?^RZCBUDmnO3SFvK1
z=dtQZT|;k}Q_Ecb7&bI2!?<!fsVUlLz)F%BCrRY@G1|QO_WN?><b6rxH@z>F^rOVS
zI&3gX<Y#8eQQGpK_z}Dr55wE0bZf&YXW2&<y{Pas#gX^o@HXA%l(2^OO8YM77TjXF
GIQBoX{aPFV

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10a95177e23d7d0e8d5d28d117264ec7e810d2cb
GIT binary patch
literal 3832
zcmaJ^&5s;M6|d^A>G|AQd$YU#2w5PCm=HD=NdOtgN{AgU&WhNJ96D=S?Wx|~UiWm*
zR@d6zNw<WwK@kZS;edot4l@!0aX>jBP$b0vz`2f)p!mR%i!bnd)w6L(5H+gLepT<)
z`}ozH?{vE@hVsv=fB4}ui;Vr9Cdb9W;04s;S7-ziJY_-M^N|%;ydK*lCveQzO5Kqc
zcxG&;jgcStqh`>Y)3t)OIinMFOy3Q<!VzxT+hj4DF2roFKV6(LnF%kmL}SK-1>uY4
zn=CjXTB42q<ae3qi0+$AbYp98>BtTq5&Rk(^zK7GwlUzkd+lH}isa#SnW$I}EZyCV
zvm%$5A{7;}(k&WX$s-ZV`y6|)g+HmC;em((dpJHA+H?}i0%{ZWCDdXQqzS8R#Y#)?
zBkN~ZD@RzBd-vtinerLC#BQ9gywaOm!k+Oht7=rfvX!IU(w}-WhIMC#eMh`%mTqa6
z%?qq_g}cHkcmE65*>x!LRd$uV#&BwDhwU(nRqd(+eo#92O{?zRUzXiDub+ZfOSs=;
zRg1U?Pc)zsqbkTy^J~rcC3aNv^i-qtj<{&wV)C!br!i-DFR5m|Uh-8?;tuC3w`@;a
zr3V?^lCQ8ITc_9zv}FgYwpqvODg0=4|8LjX{@b6HUbR(cuUk^<iq1Vtb*A02LpQH_
z$~Uw2jPLb`f7yfKx*M8HNrsr%ub;p1fUijxlOe58p`I45p%%+%6jNMTC7asNsxxDx
zQ=G#$2CjD1!8nbzJ&MNK-A?mJX}3@^!K@gki6UEPzhknGbN=-sG?&kBee~<!e)Rj_
zQ1{^nKl$*3ceb8e`}i;KfB5G={o{}CeEj||wyxwmFJ@7CSR}=#$6qUUlPnHJe0wX(
z@(~C`tm2_c@@y;3cjjv}4Cmvpi1!cTOeIkoDj6l&`uI@$7t&~SQ$!aZg){{!n>@Y~
zXR(Y_OdG;9x*4bI7t(wfrNu=IYVuF?V_^#gsnO3_40hl#H<l_6&*88*yX8oy<UhCe
z9nzA~G}K_HafaBi*3W5v*XW~e5dZ8!>q43oYM774wOGW%W0NQTzhK8(9`L8`6QF>L
zE7ajQ9xP}lAIF()$~Y1!)K<tI2qaO-HZ>h;y40LRqusGgGT2T=*-oszXpG%LQeeWw
zxg73lPnn>GU3Vkm5Hz=wG!Dm++ST1VNhb0;A+ETCX&;L~f-Jlp4@05~7HOpNaF|9#
z5t4{O=a`5HG*7fca_Yu>S=-p6d&fkWK#X7^I~axeHtbXsuzL}-l5BjSLIH#c+WCPx
zo@mGL_2~ns)n^p0Pkdgpgg(VnXc%{R2epUaD);#^#x1P$Fn$6(tt?w#;b(cD%O^oU
zB#lxzgZc@8y@x<58v?;wwS@)e00gWsVXq*dN|(j}8!nvEIRdI9z<6mP;GH993V@mM
zstYJ<maJ?Y*$8E??3JEy33%=~)dFCoU3PGue~(vv<xiV4RxPU5v<=AW%m5bEiFf&3
zPj!hGfaXDUQZ%cjyJOWW7tD$PXg-3vT&<ML(**=~A9Yc*z?0AHxfIJ~w_FB@u9l1C
zYPnLc0$fg&C#EN6K#TI!^rY#RFj|^1K;#m>Jwj{y?_CFYi1u{3JSiY8AZ|&mlnZ;S
zNBj(%*#?x%7tvj2qIt`bZ>v*#r>AF1f5zYC#T9jGdX~m`&pxX|v(M=9w0d;<1>F0w
z87qC!1E15-%)$zrL9+yxWq-Ox@<5h8@f3*O!Q#fmC$xauJU!{ZD56n#Gg8Cdut+BH
z^OMH6<8eOReSUIsldxlrT+jG#T%2Fqe0kvL26>lx;U4bOjbis;dmBFA8YfveibuIT
z9Pq#cCy~^+d;i}klgBU9@oTTYQ9b=eb>pcugHTDf6Q+3q4sV=ae|Ebv(0pIOuO9HH
zC-(YSO%{zfAY9u{vZUCJ#o&wD2IxuxBkjJDi#XMm%C!fqOop-XQAv;`30Jh0WkLTX
z2`-m2CG+v&6%4hTk5w`X9MXesjUyS20I#x;gp;}%<N5|819zC_2vi)dBH)5^JBjZE
z%}Ffta7RWW@NY)LTX!NUw1a$Lq@6;=W9@C{aug|pHu9@RjJ(he;(J%SNdeVpe-zz{
zF(~9?^xY|<+t6a62$)4O1Ui9p5f9GDFVXjT=##N@dp8jx&Wz=BcMcM1bO`x%Z$6#d
zPPb@oOj}3__n;~BTp1t>mPe#K@(2SYi8QctsG9~y2vX#iX-k*<2BOxVgAPq`*KrD2
z1J@i9EY`2j5W>$v2)P)I@Jb>T(%v1DF9pq^=>ca!)7StPl7VX^3|hu3j7!K1bZZ-@
zYS+9`+N<RZ9P$|K0NKuGfv#O2U_=_=fZ`iynC0_TtHqbBj@9Qq+ehsK<(7eIzSYOv
z2hMf)8W64r>^qB*&*cVo4r%*wq4$A`fz6l&c>DtEEw;@C7uK78U<+F~$RUw&3J<;e
z7GBQ4L+mx=*RgL%c?K2gJybfdcmWON93?W1sb!!A*+*yt99f9`ZqKWsl9=3cu+~uS
z)CYJvGoovn;I=n55q0f4Ft_1UYS6;lbuf;lVXgfzMEIyMl$XIzzD&(?XaeWTwar)X
z8Wn03cvtWyMN(VaLnJ#+5vSYo5>bA#NBb~>@jJY}&2aw0g%96$`908^T*2i0(5Hvr
z5}));pspW_F{Id}LV-&9cnP)m78=E-(8V4-5Nyu|Fhd7`cfjHQT({{N*|zEo&8uU{
zrSL53OB&Ii!MXBTGy_MHaJU=XF0gjbX=`}yb53M5Rma#Cs0*?c!{fNhCyVtpK1K0g
zH}OGeZbIIR91>3pT3&MG=X^##3r~<$!M0OKjz1~LfW^GuwkhkatdeeHZ<tg{uHzs`
z{VF>718S~QbG2SIcX3Tpplw)2^SLP~0hN?%=?0L9;3>kSzKDI+I%`k5^+o1i2MzL&
z3?LYWx)p{9paY~kq3(p?{y~)1UyN~UDQ?i&-MIq{<1~e58K%@A4g5YjBY<%~64x+C
zpf!EhtPPfpu>#*DT<cj@`(wMF8fcIdy+Nl=#ct-f#cKxBgMLkPppvv$CzLU{n*n|8
z0`QRdTG?3pk=y~a7qPsEgLLfzvSAKQiZHqzC4d&79HliT-n9p{;O0ichZlP2-$<xy
b-CU$U4#g^(8az4x4+0YW>{Z)8?GOF~fPVsq

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d82bb0513d0adec490a64e0381970d80348f3c79
GIT binary patch
literal 4309
zcmbVP%a0sK8LxiL^gMU={qQ5}BuES-yX%k#a_rzZ&g0-139sa+S<`Ay)$Go?r)OMU
zYkMa>r!|UDV&Mz-87TsBKsg{#B!t8fiT|LEkf8X&0SOLV;P+L}Zfs+GV0x<RtFK;P
zfA#wweW6w>GyH}>{q>LjG|$*ysB`$Mf;f+o{u32v+=^L{w^r8<Y%8aot{b?9wqvhb
z2nvRF;$qhi{B9{IP3Dz@irJ$YR83n8YTV^sT;F7Zjiv<KZH%VJOeVaL*t|Hlf+_Cv
z(p?tJyvul*SMD-i5%%uvffLN}Dz9N=UM%qX4O{-5yTKx#3YK^ySmx8g3g}~eCOFP#
zgA;r%ILYUOXZS*}$`^xEd?|QVoZ`!iEO<`Li<4qeEQw{YB94jU;)FOQo)ga;R3No=
zg*8_m;N98frlo6F_PX6j?q8K1C1lgqwM~(vJ$W%wQ7V)!6LF~*aUmaASc4_jx3t^Z
z<B@h+{k@h$JK@(rDWbfElKu@-70Yngfjx9GXJn1p$R4wys~qJjFLMv<0}H(ZD9d81
zNbS&5{-~6BnYF~Y-L}Wpu%P_i2?vzNoqP7M`1PzTpXEPcR_RWn%G}L&JFqXZYg@ze
zoeQd*(PJ$#!`m2EGB>L%v#gMnml=1@vdkwc=W=_Ac?^8}HRiCh?B=IeX~e82?h?yN
zSsDEMS6MC<hxBud<X>c&g|~3<7S$(>L3SFe-MOC2`S403+k24r{A2R2LEeIqw`!!U
zWaTBu1)1p`Y7g`7TQ@haGSufGbBSbuRP4w0G8@*R_v&uNXugouj;Q*Cs;b&<J)?Sx
zG;VseM|ySi8Z>s`45zXwK&o(ATT*(seL%$HYQHZe0p@{CtZD7``XbT36cLX*iO^o!
zk2^}caVJ&U=|+9+bP}b#zNE&9G1*S~+8YSjOSLDXWSbW4MA(-wr#;n^tsPxZ2Cv#{
z$GwR5>m}RGhOWfD?Hgi0>_=)x*KT(b-n$)Q(`|I?$zC_?wZm2%rKw1@)z&WNrn)eh
zQ0yfAJr$nTes53Z)P(htF61n2b+omsD~Y%r9?pKK^O7#p$YgGj=hpN~CSQQ)|JXzI
z_L;3OKmEm*zxp{fUwr<PFFyZx>y_27e*f7Qzx(Z9fAjHIpZ#p>Qg8dsB#QUbPWmwY
zV!8tbhFsj*ijrOz6Sz>Kr8>Q23vWH)5gAVCP|2v1to8SGeOn|#MoNTnbX~-C2$H^v
zidki=YSk?t&x|!^${gB;wQ4O{$IV0iRdCAYp%J-;l`UF+_&f9l>GR+S7Fe7G_D8I3
zap3Q+A2{6ME?k`Z5p!AK0XLrZ-tTb{w+fRzsHH+%r5~VDOj#Msl^lr0GACo910!T1
z9La7Wv%qm_`+LMI8lL?K&sW~4#NmeKSmupw%qnw+qutA!z-r}sq*^;+yCYL2D>$!N
z0nYXSGr5kQ_Cs87M}?uT>~uI6Ntkv9Lf0lBilcoYQ(!gG?xic6?`tnkRd*wAPV2%Y
z+*KsC-EK7t+D%2=*3}#^v`x72Nbj`#4vv&Bn{xpka};DOg*jNm&;UrjNV8r-MQ}<f
zqA^@v0S{f$pl&p}b}0CfkEV$}%3CPupHV3`f@*i|%o;h|8WZh~osp*s@J!BAN1^Od
zF;_l3wY_BzJ$UQG{p=^~SA6QIkNy12&Wf2o*{@{wgZD>Zh2P?K-nJchy3&91+L$r6
zhh^*o4-fe(Sp}EuT+ZFlzwv8U<PzRmA|;q8*`-S`O4qjP=>m+Z(-QJEn($pzh;fmU
zXFxTp@^v)9#1U-BXQ}7zY=j$u+oIZ|3)|3EXt<{h_jJxZ9hACzvFeaR&<?8n8tL}T
zUFu@HCvQiRzXDD=gNj)v@yt!?mtk^)T7IjOnLrbenM7RyWR}CQ+v9sN(bX`#xfjLx
zhyjUw7yIej1Vb$m$A>r~0h*Tl9-16eg!pfxq%JDos+Nomw8$*zcO0dJVgU39xcAKG
z2&Kacw-N4(2cWsLNTI(#Gy<PRA@~4Ul>*3cy=1xGfj2@LK*0~G04@w1#{{K%R^%Qp
z@ZvppIEAF3l2x&zf6p2=D1;(`m{#RcC9939V}|uJ_pLkMR5d#7B0}A@mEkNe4d?FM
zSM_X)c-QU%nDblCaACMOT*?+kQ%FP_DASzIwE*^9l7(#Xu`&B-Y!QJK<49$ev*~O(
zTgum<zmm<2W{g)_8O@q@E?XJRjoE8#IES%$;@un}Ik0$Tw2;kmybl8SoLbDLc9#yU
zV{GhD7|us{ZGrI;GQUsM^6tv$SXLTa_pP+AmPf~lM)vl&p3RO%*T>X}(Mf3LnK7A1
z9p@Z_?xq&m7}~%236g-uXqDuFEDbu-_`vDQ1D_%VFu6Wxyvd_(nEOLAjB|tH>!ROl
z?VKCTZc+$YC8swDn`c%x-)Xv%(4xy<uhGSHXRqCkg)a9yN!S(Lp4@L*K>?@mjx@$7
z&tuWx`L}87)gOL1T>o%*?S)knq&mrV828fj%<6|{)?RH7O-Av6WB$La^?|e2SA%II
z3MQ_$;h57M!JDVF1LrNta13DO1)>P0CPYa>v!p~sl7Z>QTgWNWw-Y6M{ryWIwb$#b
zPFK6600<sQ*cI~OR4&l`lE8bLu&KQk5)46(LPf|v+*=)STbC$F3Abg$wSPTo-5^WW
zE)r_&m@3iN1sH7?j%1cZFdDOw)VL-&@j5B=qWx}kLx4z)i^96-7F3ujiq*W+LS}@M
zQ_(ywPtYc3sX9m1QN)lZ(M8^YD>9ds%1dAbCE4pKlLrM0a1tV(WF^>DB5T0yNHXi7
zMld+y1y$<e4f-)8(4JWt_>{<m_%6U#4^c?D@L`|}x6Ri>P->YLiHI&4g_3jCo{=Uf
zoBK7FE3c8HPAYC{7rrV%u%v86euLP^;L<ekkfHRpA9s_Ld=Decc}z~pM>E^Emh7^%
zU{~#iRd;-p8OxpIEoHj_4qO=?%3kVlrN=?}mh53IIngeV9fl7`n-55bCJT_?r|uuA
zqNH~4#@gCC{`5Jt{?6v66ygUWg-?I+y^HT$S?5CZNn!0yEYfvQ$R{UFo_r0?{r~Gl
zss_!cF3<1PC}(|e;i*#}m6U`Jqw5L5GtP6uHFZY~%1209Z$6(R(l`0`T9V{8RLQ&M
zn&lN}D5&O{`1Kyp|ADbK-N+~HsZN}(5n@d0Zp=}80BV0<&S9zcBe@NuON9)nJ274Q
zoJ4w*hS9C41BArFShFs?Z(?0;>2!(mJ*xhfNys*hk}Vk1(bI=O{Q3i-p`c>|+|eh)
zR6gm*cJ!2isJUPhb#?tnMaH-uf+|JTJgoa?F$rP$4f+yFDUHa1SOpLXlz8kV$3HPM
P?>Buw_Q{WL67K&1a4VK<

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/log_anomaly_predict.py b/anomalydetection/loganomaly/log_anomaly_predict.py
new file mode 100644
index 0000000..106120f
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_predict.py
@@ -0,0 +1,122 @@
+import torch
+import os
+import torch.nn as nn
+import time
+import numpy as np
+from anomalydetection.loganomaly.log_anomaly_train import Model
+from anomalydetection.loganomaly.log_anomaly_train import train_model
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def generate_test_label(logkey_path, window_length,num_of_classes):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input_1 = np.zeros((length -window_length,1))
+    output_1 = np.zeros(length -window_length,dtype=np.int)
+    input_2 = np.zeros((length -window_length,num_of_classes))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input_1[i][0] = keys[j]
+            input_2[i][keys[j]-1] += 1
+        output[i] = keys[i+window_length]-1
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input_1[i][j - i] = input_1[j]
+            new_input_2[i][j-i] = input_2[j]
+    new_output = output[window_length-1:]
+    return length,new_input_1,new_input_2,new_output
+
+def load_model(input_size_1,input_size_2, hidden_size, num_layers, num_classes, model_path):
+    model = Model(input_size_1,input_size_2,hidden_size, num_layers, num_classes).to(device)
+    model.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model.eval()
+    print('model_path: {}'.format(model_path))
+    return model
+
+def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
+    model = load_model(input_size_1,input_size_2 ,hidden_size, num_layers, num_classes, model_path)
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    length,input_1,input_2,output = generate_test_label(logkey_path, window_length,num_classes)
+    abnormal_label = []
+    with open(anomaly_test_line_path) as f:
+        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for i in range(0,length-2*window_length+1):
+            lineNum = i + 2*window_length
+            seq = input_1[i]
+            quan = input_2[i]
+            label = output[i]
+            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size_1).to(device)
+            quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size_2).to(device)
+            test_output = model(seq,quan)
+            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
+            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
+            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
+                i += 2*window_length + 1
+            else:
+                i += 1
+            ALL += 1
+            if label not in predicted:
+                if lineNum in abnormal_label:
+                    TP += 1
+                else:
+                    FP += 1
+            else:
+                if lineNum in abnormal_label:
+                    FN += 1
+                else:
+                    TN += 1
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+if __name__=='__main__':
+    input_size_1 = 1
+    input_size_2 = 61
+    hidden_size = 30
+    num_of_layers = 2
+    num_of_classes = 61
+    num_epochs = 100
+    batch_size = 200
+    window_length = 5
+    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+    model_out_path = train_root_path + 'model_out/'
+
+    do_predict(input_size_1,input_size_2, hidden_size, num_of_layers, num_of_classes, window_length,
+               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 9, test_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
index 5aad10d..2aa0d55 100644
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
@@ -113,9 +113,9 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
 label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
 model_out_path = train_root_path + 'quantitive_model_out/'
 
-# train_model(window_length, input_size, hidden_size,
-#             num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
-#             model_out_path,train_logkey_path)
+train_model(window_length, input_size, hidden_size,
+            num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
+            model_out_path,train_logkey_path)
 
 do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length,
            model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
index e81b506..4426687 100644
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
@@ -44,11 +44,9 @@ def generate_quantitive_label(logkey_path, window_length,num_of_classes):
         for j in range(i,i+window_length):
             new_input[i][j-i] = input[j]
     new_output = output[window_length-1:]
+    dataset = TensorDataset(torch.tensor(new_input,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
     print(new_input.shape)
     print(new_output.shape)
-    print(new_input[0])
-    print(new_output[0])
-    dataset = TensorDataset(torch.tensor(new_input,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
     return dataset
 
 def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
index 61c0f64..ee13038 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
@@ -65,9 +65,12 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
                 count_num += 1
                 seq = line[i:i + window_length]
                 label = line[i + window_length]
+                print(label)
                 seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+                print(seq.shape)
                 #label = torch.tensor(label).view(-1).to(device)
                 output = sequential_model(seq)
+                print(output)
                 predicted = torch.argsort(output, 1)[0][-num_candidates:]
                 print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
                 if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_train.py b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
index b27c607..77e32de 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
@@ -34,6 +34,7 @@ def generate_seq_label(file_path, window_length, pattern_vec_file):
                 # line[i] is a list need to read file form a dic{vec:log_key} to get log key
                 output_data.append(vec_to_class_type[line[i + window_length]])
     data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    print(torch.tensor(input_data).shape)
     return data_set
 
 
@@ -85,6 +86,7 @@ def __init__(self, input_size, hidden_size, num_of_layers, out_size):
         self.num_of_layers = num_of_layers
         self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True)
         self.fc = nn.Linear(hidden_size, out_size)
+
         # self.out = nn.Linear(in_features=in_features, out_features=out_features)
 
     def init_hidden(self, size):
diff --git a/anomalydetection/loganomaly/log_anomaly_train.py b/anomalydetection/loganomaly/log_anomaly_train.py
new file mode 100644
index 0000000..9202c1b
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_train.py
@@ -0,0 +1,121 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+import numpy as np
+import argparse
+import os
+from . import *
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def generate_label(logkey_path, window_length,num_of_classes):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input_1 = np.zeros((length -window_length,1))
+    output_1 = np.zeros(length -window_length,dtype=np.int)
+    input_2 = np.zeros((length -window_length,num_of_classes))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input_1[i][0] = keys[j]
+            input_2[i][keys[j]-1] += 1
+        output[i] = keys[i+window_length]-1
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input_1[i][j - i] = input_1[j]
+            new_input_2[i][j-i] = input_2[j]
+    new_output = output[window_length-1:]
+    print(new_input_1.shape)
+    print(new_input_2.shape)
+    print(new_output.shape)
+    dataset = TensorDataset(torch.tensor(new_input_1,dtype=torch.float),
+                            torch.tensor(new_input_2,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
+    return dataset
+
+class Model(nn.Module):
+    def __init__(self, input_size_0,input_size_1, hidden_size, num_of_layers, out_size):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm0 = nn.LSTM(input_size_0, hidden_size, num_of_layers, batch_first=True)
+        self.lstm1 = nn.LSTM(input_size_1, hidden_size, num_of_layers, batch_first=True)
+        self.fc = nn.Linear(2*hidden_size, out_size)
+
+
+    def forward(self, input_0,input_1):
+        h0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
+        c0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
+        out_0, _ = self.lstm0(input_0, (h0_0, c0_0))
+        h0_1 = torch.zeros(self.num_of_layers, input_1.size(0), self.hidden_size).to(device)
+        c0_1 = torch.zeros(self.num_of_layers, input_1.size(0), self.hidden_size).to(device)
+        out_1, _ = self.lstm1(input_1, (h0_1, c0_1))
+        multi_out = torch.cat((out_0[:, -1, :], out_1[:, -1, :]), -1)
+        out = self.fc(multi_out)
+        return out
+
+def train_model(window_length, input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
+    # log setting
+    log_directory = root_path + 'log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    print("Train num_classes: ", num_of_classes)
+    model = Model(input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes).to(device)
+    # create data set
+    data_set = generate_label(logkey_path, window_length,num_of_classes)
+    # create data_loader
+    data_loader = DataLoader(dataset=data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters())
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, quan, label) in enumerate(data_loader):
+            seq = seq.clone().detach().view(-1, window_length, input_size_0).to(device)
+            quan = quan.clone().detach().view(-1, window_length, input_size_1).to(device)
+            output = model(seq,quan)
+
+            loss = criterion(output, label.to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.6f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % 100 == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
+
+if __name__=='__main__':
+    input_size_0 = 1
+    input_size_1 = 61
+    hidden_size = 30
+    num_of_layers = 2
+    num_of_classes = 61
+    num_epochs = 100
+    batch_size = 200
+    window_length = 5
+    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+    model_out_path = train_root_path + 'model_out/'
+    train_model(window_length, input_size_0,input_size_1, hidden_size,
+                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
+                model_out_path, train_logkey_path)
\ No newline at end of file

From 0c67da83a5422541cdb4b55efc5d2e5765dbb5ee Mon Sep 17 00:00:00 2001
From: cainiao66 <17717084193@163.com>
Date: Thu, 7 May 2020 17:41:21 +0800
Subject: [PATCH 3/4] fix log_anomaly

---
 ...og_anomaly_quantitive_train.cpython-37.pyc | Bin 3412 -> 3412 bytes
 .../log_anomaly_sequence_train.cpython-37.pyc | Bin 0 -> 3901 bytes
 ..._anomaly_sequential_predict.cpython-37.pyc | Bin 3287 -> 3600 bytes
 ...og_anomaly_sequential_train.cpython-37.pyc | Bin 3832 -> 3815 bytes
 .../log_anomaly_train.cpython-37.pyc          | Bin 4309 -> 4334 bytes
 .../loganomaly/log_anomaly_predict.py         |  19 ++-
 .../log_anomaly_quantitive_predict.py         |  52 +++++---
 .../log_anomaly_quantitive_train.py           |   1 +
 .../log_anomaly_sequence_predict.py           | 123 ++++++++++++++++++
 .../loganomaly/log_anomaly_sequence_train.py  | 107 +++++++++++++++
 .../log_anomaly_sequential_predict.py         |  32 +++--
 .../log_anomaly_sequential_train.py           |  13 +-
 .../loganomaly/log_anomaly_train.py           |  11 +-
 13 files changed, 310 insertions(+), 48 deletions(-)
 create mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc
 create mode 100644 anomalydetection/loganomaly/log_anomaly_sequence_predict.py
 create mode 100644 anomalydetection/loganomaly/log_anomaly_sequence_train.py

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc
index 37c9d88256cd4afef5d5dc2e2bae3a62944f8c06..2a26ce1e6db7da3dd9dccbb2dfd70625e2666b40 100644
GIT binary patch
delta 40
ucmca2bw!HTiI<m)0SJ`0ZH|-N$UB*xQGWA6_Q@=arkhQ9WEh!+Im7_bJqb7f

delta 40
ucmca2bw!HTiI<m)0SLAQZH^P)$UB*xQEu}>_Q@=aCYw!pWEh!+IK%+fN(o{B

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f8318b26445d6b774b9fa69de8510172cece2f5
GIT binary patch
literal 3901
zcmbVO%a0tz8L#TcOwV&?AKr&wtbs%ff!!q!NDL-CO@xdOcqOBbNvAzkvpegao^f@p
z?Hzi!ED?&p3YT2bj1(oxA>t4vLW*)u{)Rdx5aDa2aNv^szUtZKp@c)ar>efHuBz{w
z@AdnhYc#41&oBP+`=4A~VC?TSIeO|Ko<Yk#MJJfx2@8vn_npw;1?~2|(6h9Y`2A8?
zvb39&`#~7=D`91}uNv0u9rdto$41x?p74|AHj`{JC)r+WGCyT16{XY><tY#6L?9|}
zv2fufCaR+L785n;>@6O;;gYC}2ELq-%c6P1QU4TPxFY7lRnZF9#C&)X^eM3ro)(MY
z8L<?e6(_>`#B#VUR>E^)HN0P*6Kg9hd_bO%XXT1qm22{(JS9)dGxD5#K;Cy)gVg*A
zYoEM_|IRMAxoKS4@AqSMa8>oRRBgvJwq=?P)C;kWGpS9Lh)aW5NOh0n436-Jjn~;1
zv2i=Y{f<jF;c1|i(4I%jeu}M{<zRN`jNROw@F|-(Q#SUrt3B=K-l22IF)M-Moar)+
zV_yf8O77=;l?kWoO!>H^gT2`bC||gDopJfjtGX(@V&9?j0(<?Lad79Gx|#=g`3kdZ
zOXEuJ<&`y-m-1kZ3GXR(^UbRaYjzJ-oK=>W^8o96S6RVXfcFw3?kg<kkimrv)jKa2
z{J!|r^4J33hrhyi6?{vUFXRFDN*=5-@c8s9yZJZpFVnr?z<%bev2hI=sP5IQ4oZ1#
zM%8s=ubEfT&(S}&vl{vq&7jeGUWXA&mkn3ShrVGHU+9CORAi!i4lT_aZ!nap36zXQ
z(o3cBvtiQH#!GscHf}#28n>5f;|~=LZi3BDIyC-BszGLa6{kCNXg9{aWM$*)f$Hp<
zlC~aV{BAOc>Apd_({7krGT6Bx529hLcTMAVFBOB^5iZ@qw3+Vrqd_<7BypC>%<!)9
zus1WM*@kj29q#MsF;g1sYfy&w4Bs=gRNjt`K2>Ql`&<@<fBu%KHbnc^o9JFVf9>O6
z{pRD}{hEf4KK%JdAHH+#d+VS4`Gb%C^v8ev;hj%D_~o@rgPmv7I626A*%#?Yvt1}C
z67tryI34t{K}an-x;IF#;a|<xh>T`*l*ya>GVRDntGJhL3=d3mN2XH6T1H8HT_#OP
zls$xw@hY$LCJ*o~@TJ+i%1`56LTllBmEX_Rx3ITE2acYj(8?afiew=dEOg#xT`u54
zZv~+%T;U;KcyBY0g+5%uH~uRFA(IY8B0V%xqpPxK&}pW5jtHS><Sch{7P*KC79lG4
zN;$`hN7vt_wX*gn0apB}gIPr|uy8ML!`*7vW8K+}x;>TYb`@T-4<}KNfPo1jSgfa`
z$kcXwLdZ1AdLwBXGZ#qW1F152T57yYSGHd<ev;|_mML9=31VfOZl_%`UM7>SsTZcF
z^U>CqW{;^Y@KTRihVTi?19DZO!pEY>`h|L!c5R~j23lr80j4P7UE-sr)z!w4yd8=g
zddjo*JjQH@PO}M=w&&!0;tD<`+MBu)Uzd;`oUi=~1y2z2xs$s{XwEff?2Ub_1yHk1
z0>UZAuDix4;pHW(<RX&)ji+5vtB^sRpw9kbj}&4RU>rR#CHQTxBh`0l!{g}MW%UGx
za28Zsp|`sgq3=-dleV@DkBked&=Q4^Rh21s2kLgL1X*&XuCx5C_0v(KP}gh+j08)k
zabLpH)hOx@#C}3_J&JDb$4T+U>RvsE+e~9-nvP78Bl8gBHdjv`@wXmC-ky2r0$wlM
zl}-&adQPEL(5z+NN3zQUfrLM<30NU7AA%O{3PDea=y6?h0*XVRx`y@03UGG+(4PQ?
zpzQ{rsG2jZxZ@^LaWgLqUz9}ot~Z_oDAn>hZVc}7af<)~05-3ylUm-G)Td;vh4=WK
zl5P+;z|d#oMNt_q-5Kg;K4(`1?0o`ldAyP@Pv!uEEwp(-JUKk5kYYJ+<ja7+)qFl*
z%~y(5r1`acVX|PubZxR|$EAF2vNUB+u<;VUouIXw?*qKJs7;phMFD96K1+HfpW9nK
z<fquwC1@$Wh{iG#71R~)>b1R-lT&$R%HQMJWxX~zO*CqQuWHfWXf=6KpP8J+|GsYu
zNE1!)ISI|ot*|L{d-GiYTWhjT@<5gr@wCZe{_<%4I8##kgv)J?TF;8O9~BvdT=~MN
z{G=QXI=dG}i`$e=*C|)bu;Tps_QkfRD3hA%=Vweg+uiSW6KSf$UK;gff1nQ9JS>5W
z=qVdM>M0x=J@_JBz5e4j#+z@9Uw>rXChT6i6D5NzJHP(M`HjcBV+*7A1onT%H%IQq
zP><%VDA>5(MLx@RrD&fsZl;w&MaisRs;7ziA$63z6*;Y<ETX=R&a|EfkY+EYS`CH=
zmp~eSFx0)i@kjv>JXQ!FlwFxxru`L(|7{_}_#L3Cq|nweAlJLqlebNUkU82>u`t2)
zxO0O7)p)4eaAT(B(3B9H{aC9-62WTB0g`(bk^No<y_ldM-;f|On^bTvz6BL#ns7_>
zIw%LgIg{;E>I_}-ed>Nd-EqEEXE8NZGLbb}re4BASW$z4w%{KwBNxbI2GzJ#0ZL&V
zb(;1y$PJFy!a7ay4~7XOFupw*288Z_SdHSEDAXCKKTYYjEmFfu$BsZ}Q?UxAbZC4l
zO<1+|wZ>Jik)$ph5FkUCtRA2R6f9X5`oQVI&R6qatvGGP_zpH_luwxx@KvYEmz}!P
z;!QU|YjKY+1HA*Mg*h@6-gRJk6Igy4RKQh_m?1~@;n7ia?*gXxNOy{|nR=O~f2EEp
zxY5%a8=Lr1wY_<9ds|8Qs?7Ei{pgQgxOin#NI4`$b$f}-HbJ3ynhiw(GP>~ppO={)
zwZHawVNa`?&CzpTyY<VGlJHS{JtenHin?e+-_fJR<D_uxDx2*G3zrORmAR3oMG)1B
zskO7Z3{{2oqW-)-fcw8@!_BmcP5Zi+WE<o%wqUjKXnc6;@IWo$s0m`VgQ&}-3TZlv
zUBY`*7g-eFihFRBMA~pRrB^Jb6`@YnRD?VKGbU6_Unv-D?3no@PyD<rZfPi+{O-6=
zo+~yT2alPvo@y;^(bY7MRb-><3m!}8S-^Ji6gB~VpQidhJCCmL$T~cdyc4gp>ITaT
QbHPIaJomr9fvrUU14gJd!vFvP

literal 0
HcmV?d00001

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc
index 7a6a90f862c8a2dfb3737e6919cea587a42e49e0..179af285b504f560c852fecfd05748c53c39c6c6 100644
GIT binary patch
delta 1232
zcmYjQO>7%g5Pt7@?X365acsxCZX73#W2de_YSlw&FRf?~T!NrQO%$nEPM#A58{2u_
zKm@Zgl8e*}!e!wI@=9E|fw=X^iE}SVzHmV59nKv1c2k`0YTnL#^S#-bd2jwGP0E?K
zZQEdY?mqwZ!+ZH>8JBmTZAsgd))Tg8Oa72G>|Z9gw2_Edc(hk?58vT6EJw^2XLVxt
z+2e|zATD`{QKH18=xE9yF@+U!&Q6RSMgs8??<yYc`}e26Uu^Hccc;OX1G}Nk(gW3&
z#|K_p&QkqTZ*c0(QisP~udQYu@PV?xI$vyPv)s9>IsCZQ?KZvspxM~~K8)+(8aH^B
zXSj-TJcuzU4M?QN*&hafYTsE>_^y|x1=7YM_##;wt?V$#-e(WkF7r&OkCD4)P3@3P
z_}4#j)a92Us}p|q4VCT#ef-q7fmlF1vb+r0V@{gOI&318qE(qLFtQv*n!{w$VSa_m
zGI^8vWwOb_3;at_V*XlWk0KMs`q^D?5y95Re9Y0@YyRceLR~%&MJt`ugsWWyd4)u2
zoa{!jgS0=&(i+yJG`<qkC25jIEKE#uUOwdb-BJ|Y-#~c<+K|amNKqOw(oJ+A&Zz1w
zQ<XNx!mpwi*0e}v*oK#&^)b#lOhnzGN}PsJxu}+?LeyrF8`tRebh2X2A}9Vj7yGH&
zSwN2-%p<%*`B6;DV_PBC3cd2&eMrR^E5f@RYs;vV;bN`E*6PVWJFxygKFU)8eG(mk
zpZtI$+U#YeO?ezg>{(&{CbZ(5^BXv$+G`}ZK&sIu)#jv^Ak}*XnW0StszG8ecy$C@
z2G0iOz}`a77SXc;6(dim#dpb+SzueyE~?M5rP!xW^{XMHoXh+x*qj6LN{I8v6*OfY
zmV0hVhCwO>LRb7waH`+tzXw0-FLIw{hMnU<^Pqp)^KP$y?l+v^N&3-^q-yn!+Ow3`
z?hX5DrhRhU{!~>k25Jq<Y@wx&fP3+%s2ZR~BhZb%t{WH8_7+?Fy}rVzHM{+mY%6sM
zyuHi?+S+e-gRFUDQJGQXH>2eZEWso5t<+U0!>7TwW;KDq44#`SPL5yV7KU_P=+F!S
wiR|E|c|DD>-NueaCa730zZKlH+No%c)pqc`RWIhC>RbmMl5jQM($l*BACf#FNB{r;

delta 884
zcmYjP-D}fO6u&1++cdq&+9gfXwd=ZmY)k|Z2b+8F!H<W%3_rk)flB5rbL(uoECU}-
z=&%UB2pt!}#}<4NU;G2~j}SrGtN+3N0P!S~&0KEoIlqsSlfxaBe-_Q#hM_Zh&hnq0
z8o3kG1#z+<67YyEX1;v5sX8%Bf(6=9e24a7MSe`Y=Rq1%s1B6S-C^=xvI#}mPL}$6
zz=RT(XqSiZY7^bNYzR-*LxT`b$SU%{L}LJ`x~$EHiob$ok+vD~0z<XHM54gL87zs!
zG7C#+Ag4w68>Ps?N}N+h@obR6*#Q6x_}(N|=$#Py(mQ386&X~KjVDIAU}_9ug@lzo
zE0SUkD+36tgjCP1BO9yIOIiK7AS3uFk0qQ#J(!3}7pX?<n!#c_IRyV#)JoJKwZ=Od
z^0-Nkh4?I7`f#qDqF;0I+^P5`=3`BsTuvN0o$>{H743cu+C*D%WDVhfZ7`ZmG>7uR
z*#>2TJzx*nCPNEtnv~L3=p%wATkB+nffh|MW=brqQRG}`-jb&NpN1IH)Uk&3f11AH
ze<o3*V+#{&RH#lnrb*bKVtEo8gcT@n(yS(FRyH~jGgjkSW<-{-X5_-g1(u6BZKRDE
zPPr_cC+`&L=Eo2*g3)EO)_s{v_h4DRPOpM4zox${i;f)YkL4fTbrOd!yk6@x<@q~@
zT|e-fbPE?3<dw|t=A^gX>H06d{b&1KPqh3k^Ahm3Tl@YrY2__`!#YiNkAmJ&u$Aul
zEwTGNXbE}DSCX~pN*~K_ynamw6?ABT2Am=lOi&dEY|68QXix=R(de9}rGqIQ<9f<p
hi<&XyeZz$%xofmwP5v<E?JU8OHuCWaSJk+d{tLiH%Ig3C

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc
index 10a95177e23d7d0e8d5d28d117264ec7e810d2cb..cb8839f0cec302e56e80f711bbbcc6f5924feae1 100644
GIT binary patch
delta 827
zcmX9+%}*0i5P!4mw%hJ*flCVlg7|?8QJ@B+@hemkFbE<LG4-RC?kX##+rE7zK$h5~
zQ4<J>@!ik^J$N>RNaBTyCrvz=7_;%<&40n8^V*rrZ{PghyqVc~?_by7E+b_aQG&8?
zc<^|C-)IB4&d)%ld3V!Td+Qr}P$OgH(JTH5F23+#nb2>gt+g?xvalbjf)8z^3K%f7
zsrcGn#1HujDkR8Dsh!jSh_55tROgXxM-^=8_W~(xEszCbk-KD;+$Su`wCB37Au5)(
zBQb4H_w^c7rOlWhn<J(^eNd5pfM!CVSZuv$n+DIx!~C6mf`5^JK|g=5^cs=`afnN2
zp%ZA|!FOdGBwyk2@U0OeLHuJ0V#N}qCSeCy-EZ0JsSzLytmZ0qWf82?GQH92*`=~w
z$k`db9sa;a)ZWfODN?7v0%gjz?0g_GHwanws*|&6A1|rbpo4!@XGhQCBm>Fy0=bxF
zOM&8emO}$|CA)0n!lPYcU-q)Ab~@^@EVDC~lf$gJyzAL!2kqvm$OM>tJCcMReiT`U
z%luNb0+al6^mfM}p3W%<Lc}+W)`aGzc#k$9TTMF(pVAT&^7Q;Oq*4CdFr~D%m}R-8
z%v0=urNb6=i*A{j(LnLaMVnHQHc&Ger{J(mhNjS^Hw7g5u69Zr70U+6ziOxBQ|Qm3
zdG&V#F$kf?xvV=U>%XUL*wpnJ8S2M|M1o-iQ>HWgjeZUu@FTr)e4s&1i|u;?<^|ko
zurvIj(GO4fYhz3k6&n$0{?q6lpA~+q0MWYeybyv4x~eL5&WzVbomQDkb4%w71)9Ww
zwAOIlB6C*g1%5R)k-UO@&}ff}ai(?0%VbxxPCmPs$IC^EPKuZi-h3IEL4FuZ{Radn
B&6xlI

delta 808
zcmX9+&rcIU6rQ(q+imwpOVk#SS_5J%AkiY6)EH6)BUWiKRU$T}Tf!o?Wp+RyLm(j@
zByt)L>Zvv1=)p85{sZ1UcrcrI@nVSa<i*68_@<qg`F6f<-@Nzj%>Ib~iJKFq86r4-
zWt)#i-<w?kv*Il5hU?qr#w%{tp+VAQ{)PAe<2$zgn9$Em<NiGWvXN#%7LaQWU1Se1
z?ZURo^?Hc=xrzZX@=WO_4b0^RsvRvI)ou)_u71yx)%`rl6PwJCEV)mZ&WvS~OH^&+
zKw(DR<VFMblx>Th&{|Y)z_KL`a4ScWX46r=K;%e%L=rsJW}CHgGAyQi7u6bdDoi}}
z^#?rFT`Jbf;*IYs3=3E73xftxh$`ox7a~9hn{U-gP;68D^xqt|Vubg^h#iiRhJrTH
zB9GKhHwX|0c0~5J3HI*HY-z#?94be)(6xac+KVPU^>)=RuXqZpdVagS?v%=OP)uuA
zATIW`EDVWnS^^T{A7Y<~2dB``VlV~$;&pHnE{kKKUC4-+q3M$;tX5JH<d9E4&JLXJ
zHSsqzbmzdwNg=@0y2cgmYbeNqEngj(*(A*A3q@91Dl9tGWyuhpz*?<LsVwSgg@RLY
zSfN0#Bc(SajEU#^89gnRZBcyG2cmb8=Wx2Mp9C!MV;mR1b*DdpvZrp9Y1M64Xl)yn
z1uh{ZeKaR_jPvkNyfb!BjkKj1`TBu`yo799&52v)AQZ*Anbu{;cE-H;WF{u>N!}?z
zT9+GKc@7#FK}~IG$taax<}_BNrKOQdg^o$8w`sM;oF{Z#3|UjDtEhWzdo&<(496`L
b*Ne_dabX3o2_>45IexsuHB?gK(3<!U_$|J!

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc
index d82bb0513d0adec490a64e0381970d80348f3c79..b3540ce919cea5b4ab39a4736cc070e7d9d0486b 100644
GIT binary patch
delta 967
zcmZ`&%}*0S6rVTUUHY+K4OQC6hZr?9@)h|I1q~=M5>i7V2}UDOx~;TQTBZwDGYtt1
z2M;FVTs)YF9K9Ki9E>-kXD@5w(Tj<HgKxH^a&b5F+uwWdH*bD3yZgQSPq%r~Fv0{c
z@7v*G^z@#21lHcy4^|XYEjVuBs8A~EFd64GN8dwDbAyEtS2@H9GS#8t2VB^uFKI1Q
z-EqTCpi!&SY4WIC3s=wiIqJu;T7)Y+oFH8DF)~3m_7_O?vK!{|hBo5s&en7u^c4!%
z6U0&CL?!Zhg#-xB`LaM{AE|Ft#S@wsSK0^%bQwUGh8x^Rmk#=jkvPsgQ8TM64WIw2
z;uG!I*3lKN#tnaEormMN7W$Am^JMN7Y^*L2#Ch});voGwWS1sL&2lwv6e4v1g3N3(
z>zL`TQ82lIXgPRheq=VH{^QWX(3Zt|pk{OX9!Y9<Jzz=jRL6Z*&N9H4!c_4hP=%<V
zN)B9+Q%0aqyj5Pqm>5tKstqxSiCMK121G&q34>xrW7@EsI3hl4XRJO+`y~t^^d0`z
zuEEd<QctPmpJ$nrdekj>nv>niXR>TeYOl&{Ad`04II^V9CdB>VY}{|w?C7=RO6+Nx
zIiunzIH$?}?2d?rT9Zv(64<0n`vwM)_2Q@~2tm+8Z16A@>Y~sqijiTM5(kloT~fs4
z*T|+N3`mfAMkS1jc6|r+@AT=5*Ja{{gvNFy>qPLv>zl=;Y`K(Kt+12`8E;@w92h<A
zO;^4hFXFOvetoG}a<Xev;+HW6Wzl1Hz}(?&GXbrmvQ@+_vCL|JsqE&9Y+f*H=E1Ve
zAG?!(j;H4<OX;n2el5MSmh}yLTB_u1V&HG4j3PE8;eU?VV_8j!Q}(j`&t&7A#jbq`
JI>kr({9ohK)z1I`

delta 1039
zcmZ{j-)qxQ6vyvL(=<(!w9L)5Y;#UfESsIU)yb^b&4Cl=5124eXY87mb#%*io6RaI
z1B<?h=)5n!C<;FL+81H}fj$ZP62U)V|AL;Aj!p%0x!->8Ip>~ylHBeMeIGKX4MSsi
zHD0vted*aTcR}9y{4t)uPz2{CoV7g=O~@y0sVu#RhT^I#Q7`O4lA#ORA@Ku_S3=LF
zMzp@>YPQ_YHKiH0c(<X|r(MmX0lh4Uoo>WD*^8x^r+8Y5*>a9~D!F#lmXa*YFfL87
z5X-TR4-1qD_ES_eq`qybmwA9ZA>`3r^&RrqNYu@eSV1|joipl7ZK6N30mUqm<^Lhe
zAej=7=>bp7(~?MlbVO?$#I|$nC2ZVUAk8CQlwk1I?1_|O4bxRTeI?ceVCamFGxweD
z8Y`wp8n^Ma1uNsPm5Hka<(i>J+>6h%v~t}C&cpaM(%^?9w)=3s#6i3X)gd7c%Vn`G
zkHLs&$**!}v50{NI+&p1=z{KGcH?gXjzSXpgF)+~7}0`38J-c%@J|>OOA1%UunQj-
zzS0*TA$yAC49ZALj!eSXI9gw>RUVZ%mHLwF_=;WHsuW9nf@;rEG*m3Od=gz+=jX+S
zdNmm|>vZ%ZWrsf~a62o$sW+#oKfg>H4a+K43$<FYRIOST@9atgzeq&^&M3NJoHZFm
z1wG0~Tjl7W1jFKKY#j1pH+E-`iug1M9cG>(p?WTnWJOkgjP;-NnG-n*Tp(#bJh>w%
zzP7fxYL(WV;&P48iDSlVSQNX)$^H&ifaAwpUZ|{DtBzf&=EW~#3LG(I4#16;Wv1Xz
zmRiMJhZmPK&bnJ!<@4fM{K{>M;`?;;pY!!f%_?jaD%HYLwG;sR$~DK{#6|Eh4i@oQ
olK&C&GG*sPCUHM;0NH-);%#DB8R+U!)2jHI=v8GrBXkCS1C1Qrxc~qF

diff --git a/anomalydetection/loganomaly/log_anomaly_predict.py b/anomalydetection/loganomaly/log_anomaly_predict.py
index 106120f..637bbd8 100644
--- a/anomalydetection/loganomaly/log_anomaly_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_predict.py
@@ -14,16 +14,17 @@ def generate_test_label(logkey_path, window_length,num_of_classes):
     keys = list(map(int, keys))
     print(keys)
     length = len(keys)
-    input_1 = np.zeros((length -window_length,1))
+    input_1 = np.zeros((length -window_length,num_of_classes))
     output_1 = np.zeros(length -window_length,dtype=np.int)
     input_2 = np.zeros((length -window_length,num_of_classes))
     output = np.zeros(length -window_length,dtype=np.int)
     for i in range(0,length -window_length):
+        for t in range(0,num_of_classes):
+            input_1[i][t] = keys[i]
         for j in range(i,i+window_length):
-            input_1[i][0] = keys[j]
             input_2[i][keys[j]-1] += 1
         output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,num_of_classes))
     new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
     for i in range(0,length -2*window_length+1):
         for j in range(i,i+window_length):
@@ -39,6 +40,13 @@ def load_model(input_size_1,input_size_2, hidden_size, num_layers, num_classes,
     print('model_path: {}'.format(model_path))
     return model
 
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
 def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
     model = load_model(input_size_1,input_size_2 ,hidden_size, num_layers, num_classes, model_path)
     start_time = time.time()
@@ -64,6 +72,7 @@ def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes,
             quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size_2).to(device)
             test_output = model(seq,quan)
             predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
+            predicted = filter_small_top_k(predicted, test_output)
             print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
             if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
                 i += 2*window_length + 1
@@ -104,7 +113,7 @@ def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes,
     print('elapsed_time: {}'.format(elapsed_time))
 
 if __name__=='__main__':
-    input_size_1 = 1
+    input_size_1 = 61
     input_size_2 = 61
     hidden_size = 30
     num_of_layers = 2
@@ -119,4 +128,4 @@ def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes,
     model_out_path = train_root_path + 'model_out/'
 
     do_predict(input_size_1,input_size_2, hidden_size, num_of_layers, num_of_classes, window_length,
-               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 9, test_logkey_path)
\ No newline at end of file
+               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 5, test_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
index 2aa0d55..6286fce 100644
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
@@ -38,6 +38,13 @@ def load_quantitive_model(input_size, hidden_size, num_layers, num_classes, mode
     print('model_path: {}'.format(model_path))
     return model2
 
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
 def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
     quantitive_model = load_quantitive_model(input_size, hidden_size, num_layers, num_classes, model_path)
     start_time = time.time()
@@ -61,6 +68,7 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
             quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size).to(device)
             test_output = quantitive_model(quan)
             predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
+            predicted = filter_small_top_k(predicted, test_output)
             print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
             if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
                 i += 2*window_length + 1
@@ -69,14 +77,14 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
             ALL += 1
             if label not in predicted:
                 if lineNum in abnormal_label:
-                    TN += 1
+                    TP += 1
                 else:
-                    FN += 1
+                    FP += 1
             else:
                 if lineNum in abnormal_label:
-                    FP += 1
+                    FN += 1
                 else:
-                    TP += 1
+                    TN += 1
     # Compute precision, recall and F1-measure
     if TP + FP == 0:
         P = 0
@@ -100,23 +108,25 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
     elapsed_time = time.time() - start_time
     print('elapsed_time: {}'.format(elapsed_time))
 
-input_size = 61
-hidden_size = 30
-num_of_layers = 2
-num_of_classes = 61
-num_epochs = 100
-batch_size = 200
-window_length = 5
-train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-model_out_path = train_root_path + 'quantitive_model_out/'
 
-train_model(window_length, input_size, hidden_size,
-            num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
-            model_out_path,train_logkey_path)
+if __name__ == '__main__':
+    input_size = 61
+    hidden_size = 30
+    num_of_layers = 2
+    num_of_classes = 61
+    num_epochs = 100
+    batch_size = 200
+    window_length = 5
+    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+    model_out_path = train_root_path + 'quantitive_model_out/'
+
+    train_model(window_length, input_size, hidden_size,
+                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
+                model_out_path, train_logkey_path)
 
-do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length,
-           model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
+    do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length,
+               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
 
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
index 4426687..b4b3b7f 100644
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
@@ -27,6 +27,7 @@ def forward(self, input):
         out = self.fc(out[:, -1, :])
         return out
 
+
 def generate_quantitive_label(logkey_path, window_length,num_of_classes):
     f = open(logkey_path,'r')
     keys = f.readline().split()
diff --git a/anomalydetection/loganomaly/log_anomaly_sequence_predict.py b/anomalydetection/loganomaly/log_anomaly_sequence_predict.py
new file mode 100644
index 0000000..5542c3a
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_sequence_predict.py
@@ -0,0 +1,123 @@
+import torch
+import os
+import torch.nn as nn
+import time
+import numpy as np
+from anomalydetection.loganomaly.log_anomaly_sequence_train import Model
+from anomalydetection.loganomaly.log_anomaly_sequence_train import train_model
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def generate_test_label(logkey_path, window_length,num_of_classes):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input_1 = np.zeros((length -window_length,1))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input_1[i][0] = keys[j]
+        output[i] = keys[i+window_length]-1
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input_1[i][j - i] = input_1[j]
+    new_output = output[window_length-1:]
+    return length,new_input_1,new_output
+
+def load_model(input_size_1,input_size_2, hidden_size, num_layers, num_classes, model_path):
+    model = Model(input_size_1,input_size_2,hidden_size, num_layers, num_classes).to(device)
+    model.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model.eval()
+    print('model_path: {}'.format(model_path))
+    return model
+
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
+    model = load_model(input_size_1,input_size_2 ,hidden_size, num_layers, num_classes, model_path)
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    length,input_1,output = generate_test_label(logkey_path, window_length,num_classes)
+    abnormal_label = []
+    with open(anomaly_test_line_path) as f:
+        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for i in range(0,length-2*window_length+1):
+            lineNum = i + 2*window_length
+            seq = input_1[i]
+            label = output[i]
+            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size_1).to(device)
+            test_output = model(seq)
+            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
+            predicted = filter_small_top_k(predicted, test_output)
+            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
+            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
+                i += 2*window_length + 1
+            else:
+                i += 1
+            ALL += 1
+            if label not in predicted:
+                if lineNum in abnormal_label:
+                    TP += 1
+                else:
+                    FP += 1
+            else:
+                if lineNum in abnormal_label:
+                    FN += 1
+                else:
+                    TN += 1
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+if __name__=='__main__':
+    input_size_1 = 1
+    input_size_2 = 61
+    hidden_size = 30
+    num_of_layers = 2
+    num_of_classes = 61
+    num_epochs = 100
+    batch_size = 200
+    window_length = 5
+    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+    model_out_path = train_root_path + 'sequence_model_out/'
+
+    do_predict(input_size_1,input_size_2, hidden_size, num_of_layers, num_of_classes, window_length,
+               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_sequence_train.py b/anomalydetection/loganomaly/log_anomaly_sequence_train.py
new file mode 100644
index 0000000..dab9ed3
--- /dev/null
+++ b/anomalydetection/loganomaly/log_anomaly_sequence_train.py
@@ -0,0 +1,107 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+import numpy as np
+import argparse
+import os
+from . import *
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def generate_label(logkey_path, window_length,num_of_classes):
+    f = open(logkey_path,'r')
+    keys = f.readline().split()
+    keys = list(map(int, keys))
+    print(keys)
+    length = len(keys)
+    input_1 = np.zeros((length -window_length,1))
+    output = np.zeros(length -window_length,dtype=np.int)
+    for i in range(0,length -window_length):
+        for j in range(i,i+window_length):
+            input_1[i][0] = keys[j]
+        output[i] = keys[i+window_length]-1
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    for i in range(0,length -2*window_length+1):
+        for j in range(i,i+window_length):
+            new_input_1[i][j - i] = input_1[j]
+    new_output = output[window_length-1:]
+    print(new_input_1.shape)
+    print(new_output.shape)
+    dataset = TensorDataset(torch.tensor(new_input_1,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
+    return dataset
+
+class Model(nn.Module):
+    def __init__(self, input_size_0,input_size_1, hidden_size, num_of_layers, out_size):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm0 = nn.LSTM(input_size_0, hidden_size, num_of_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size, out_size)
+
+    def forward(self, input_0):
+        h0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
+        c0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
+        out_0, _ = self.lstm0(input_0, (h0_0, c0_0))
+        out = self.fc(out_0[:, -1, :])
+        return out
+
+def train_model(window_length, input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
+    # log setting
+    log_directory = root_path + 'sequence_log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    print("Train num_classes: ", num_of_classes)
+    model = Model(input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes).to(device)
+    # create data set
+    data_set = generate_label(logkey_path, window_length,num_of_classes)
+    # create data_loader
+    data_loader = DataLoader(dataset=data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters())
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, label) in enumerate(data_loader):
+            seq = seq.clone().detach().view(-1, window_length, input_size_0).to(device)
+            output = model(seq)
+
+            loss = criterion(output, label.to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.6f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % 100 == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
+
+if __name__=='__main__':
+    input_size_0 = 1
+    input_size_1 = 61
+    hidden_size = 30
+    num_of_layers = 2
+    num_of_classes = 61
+    num_epochs = 100
+    batch_size = 200
+    window_length = 5
+    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
+    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
+    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
+    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
+    model_out_path = train_root_path + 'sequence_model_out/'
+    train_model(window_length, input_size_0,input_size_1, hidden_size,
+                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
+                model_out_path, train_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
index ee13038..7c010b7 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
@@ -31,6 +31,14 @@ def load_sequential_model(input_size, hidden_size, num_layers, num_classes, mode
     return model1
 
 
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+
 def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, test_file_path, num_candidates, pattern_vec_file):
     vec_to_class_type = {}
     with open(pattern_vec_file, 'r') as pattern_file:
@@ -49,8 +57,8 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
     TN = 0
     FN = 0
     ALL = 0
+    skip_count = 0
     abnormal_loader = generate(test_file_path, window_length)
-    abnormal_label = []
     with open(anomaly_test_line_path) as f:
         abnormal_label = [int(x) for x in f.readline().strip().split()]
     print('predict start')
@@ -61,33 +69,37 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
             i = 0
             # first traverse [0, window_size)
             while i < len(line) - window_length:
-                lineNum = current_file_line * 10 + i + window_length + 1
+                lineNum = current_file_line * 200 + i + window_length + 1
                 count_num += 1
                 seq = line[i:i + window_length]
                 label = line[i + window_length]
-                print(label)
+                for n in range(len(seq)):
+                    if current_file_line * 200 + i + n + 1 in abnormal_label:
+                        i = i + n + 1
+                        continue
                 seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
-                print(seq.shape)
                 #label = torch.tensor(label).view(-1).to(device)
                 output = sequential_model(seq)
-                print(output)
                 predicted = torch.argsort(output, 1)[0][-num_candidates:]
+                predicted = filter_small_top_k(predicted, output)
+                #print(output)
                 print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
                 if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
                     i += window_length + 1
+                    skip_count += 1
                 else:
                     i += 1
                 ALL += 1
                 if vec_to_class_type[tuple(label)] not in predicted:
                     if lineNum in abnormal_label:
-                        TN += 1
+                        TP += 1
                     else:
-                        FN += 1
+                        FP += 1
                 else:
                     if lineNum in abnormal_label:
-                        FP += 1
+                        FN += 1
                     else:
-                        TP += 1
+                        TN += 1
             current_file_line += 1
     # Compute precision, recall and F1-measure
     if TP + FP == 0:
@@ -112,5 +124,5 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
     print('Finished Predicting')
     elapsed_time = time.time() - start_time
     print('elapsed_time: {}'.format(elapsed_time))
-
+    print('skip_count: {}'.format(skip_count))
     #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_train.py b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
index 77e32de..3d87fc2 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
@@ -27,20 +27,20 @@ def generate_seq_label(file_path, window_length, pattern_vec_file):
         for line in file.readlines():
             num_of_sessions += 1
             line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
-            if len(line) < 10:
-                print(line)
+            if len(line) < window_length:
+                #print(line)
+                continue
             for i in range(len(line) - window_length):
                 input_data.append(line[i:i + window_length])
                 # line[i] is a list need to read file form a dic{vec:log_key} to get log key
                 output_data.append(vec_to_class_type[line[i + window_length]])
     data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
-    print(torch.tensor(input_data).shape)
     return data_set
 
 
 def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
     # log setting
-    log_directory = root_path + 'log_out/'
+    log_directory = root_path + 'sequence_log_out/'
     log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
 
     print("Train num_classes: ", num_of_classes)
@@ -70,7 +70,7 @@ def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_cl
             train_loss += loss.item()
             optimizer.step()
         print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
-        if (epoch + 1) % 100 == 0:
+        if (epoch + 1) % num_epochs == 0:
             if not os.path.isdir(model_output_directory):
                 os.makedirs(model_output_directory)
             e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
@@ -84,9 +84,8 @@ def __init__(self, input_size, hidden_size, num_of_layers, out_size):
         super(Model, self).__init__()
         self.hidden_size = hidden_size
         self.num_of_layers = num_of_layers
-        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True)
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, dropout=0.5)
         self.fc = nn.Linear(hidden_size, out_size)
-
         # self.out = nn.Linear(in_features=in_features, out_features=out_features)
 
     def init_hidden(self, size):
diff --git a/anomalydetection/loganomaly/log_anomaly_train.py b/anomalydetection/loganomaly/log_anomaly_train.py
index 9202c1b..b515ed5 100644
--- a/anomalydetection/loganomaly/log_anomaly_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_train.py
@@ -17,16 +17,17 @@ def generate_label(logkey_path, window_length,num_of_classes):
     keys = list(map(int, keys))
     print(keys)
     length = len(keys)
-    input_1 = np.zeros((length -window_length,1))
+    input_1 = np.zeros((length -window_length,num_of_classes))
     output_1 = np.zeros(length -window_length,dtype=np.int)
     input_2 = np.zeros((length -window_length,num_of_classes))
     output = np.zeros(length -window_length,dtype=np.int)
     for i in range(0,length -window_length):
+        for t in range(0,num_of_classes):
+            input_1[i][t] = keys[i]
         for j in range(i,i+window_length):
-            input_1[i][0] = keys[j]
             input_2[i][keys[j]-1] += 1
         output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
+    new_input_1 = np.zeros((length -2*window_length+1,window_length,num_of_classes))
     new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
     for i in range(0,length -2*window_length+1):
         for j in range(i,i+window_length):
@@ -103,14 +104,14 @@ def train_model(window_length, input_size_0,input_size_1, hidden_size, num_of_la
     print('Training finished')
 
 if __name__=='__main__':
-    input_size_0 = 1
+    input_size_0 = 61
     input_size_1 = 61
     hidden_size = 30
     num_of_layers = 2
     num_of_classes = 61
     num_epochs = 100
     batch_size = 200
-    window_length = 5
+    window_length = 10
     train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
     test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
     train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'

From 4f26ca9d8396a5e08f76366e51aa373b9160810e Mon Sep 17 00:00:00 2001
From: cainiao66 <17717084193@163.com>
Date: Thu, 18 Jun 2020 14:31:57 +0800
Subject: [PATCH 4/4] add

add
---
 .gitignore                                    |   1 +
 HDFS_drain3_state.bin                         |   1 +
 anomalydetection/att_all_you_need/__init__.py |   1 +
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 184 bytes
 .../encoder_self_att_predict.cpython-36.pyc   | Bin 0 -> 4003 bytes
 .../encoder_self_att_train.cpython-36.pyc     | Bin 0 -> 7139 bytes
 .../encoder_self_att_predict.py               | 141 ++++++++
 .../encoder_self_att_train.py                 | 296 ++++++++++++++++
 anomalydetection/bi_lstm_only/__init__.py     |   1 +
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 180 bytes
 .../bi_lstm_predict.cpython-36.pyc            | Bin 0 -> 3244 bytes
 .../__pycache__/bi_lstm_train.cpython-36.pyc  | Bin 0 -> 3931 bytes
 .../bi_lstm_only/bi_lstm_predict.py           | 127 +++++++
 .../bi_lstm_only/bi_lstm_train.py             | 116 +++++++
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 182 bytes
 .../__pycache__/__init__.cpython-37.pyc       | Bin 202 -> 160 bytes
 .../log_key_LSTM_train.cpython-36.pyc         | Bin 0 -> 3357 bytes
 .../log_key_LSTM_train.cpython-37.pyc         | Bin 3376 -> 3380 bytes
 .../deeplog/Model1/log_key_LSTM_train.py      |  16 +-
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 182 bytes
 .../__pycache__/__init__.cpython-37.pyc       | Bin 202 -> 160 bytes
 .../variable_LSTM_train.cpython-36.pyc        | Bin 0 -> 3900 bytes
 .../variable_LSTM_train.cpython-37.pyc        | Bin 3916 -> 3933 bytes
 .../deeplog/Model2/variable_LSTM_train.py     |  50 ++-
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 175 bytes
 .../__pycache__/__init__.cpython-37.pyc       | Bin 195 -> 153 bytes
 .../__pycache__/log_predict.cpython-36.pyc    | Bin 0 -> 5415 bytes
 .../__pycache__/log_predict.cpython-37.pyc    | Bin 5742 -> 6208 bytes
 anomalydetection/deeplog/log_predict.py       | 316 +++++++++++-------
 .../__pycache__/__init__.cpython-36.pyc       | Bin 178 -> 178 bytes
 .../__pycache__/__init__.cpython-37.pyc       | Bin 198 -> 182 bytes
 ...og_anomaly_quantitive_train.cpython-37.pyc | Bin 3412 -> 0 bytes
 .../log_anomaly_sequence_train.cpython-37.pyc | Bin 3901 -> 0 bytes
 ..._anomaly_sequential_predict.cpython-36.pyc | Bin 3226 -> 3509 bytes
 ..._anomaly_sequential_predict.cpython-37.pyc | Bin 3600 -> 0 bytes
 ...og_anomaly_sequential_train.cpython-36.pyc | Bin 3790 -> 3789 bytes
 ...og_anomaly_sequential_train.cpython-37.pyc | Bin 3815 -> 3790 bytes
 .../log_anomaly_train.cpython-37.pyc          | Bin 4334 -> 0 bytes
 .../loganomaly/log_anomaly_predict.py         | 131 --------
 .../log_anomaly_quantitive_predict.py         | 132 --------
 .../log_anomaly_quantitive_train.py           |  98 ------
 .../log_anomaly_sequence_predict.py           | 123 -------
 .../loganomaly/log_anomaly_sequence_train.py  | 107 ------
 .../log_anomaly_sequential_predict.py         |   2 +-
 .../log_anomaly_sequential_train.py           |   2 +-
 .../loganomaly/log_anomaly_train.py           | 122 -------
 .../__pycache__/__init__.cpython-36.pyc       | Bin 174 -> 174 bytes
 .../bi_lstm_att_predict.cpython-36.pyc        | Bin 3240 -> 4306 bytes
 .../bi_lstm_att_train.cpython-36.pyc          | Bin 4311 -> 5474 bytes
 .../robust/bi_lstm_att_predict.py             | 113 ++++---
 anomalydetection/robust/bi_lstm_att_train.py  |  66 +++-
 anomalydetection/self_att_lstm/__init__.py    |   1 +
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 181 bytes
 .../self_att_lstm_predict.cpython-36.pyc      | Bin 0 -> 5806 bytes
 .../self_att_lstm_train.cpython-36.pyc        | Bin 0 -> 4953 bytes
 .../self_att_lstm/self_att_lstm_predict.py    | 246 ++++++++++++++
 .../self_att_lstm/self_att_lstm_train.py      | 140 ++++++++
 .../self_att_lstm_supervised/__init__.py      |   1 +
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 192 bytes
 ...att_lstm_supervised_predict.cpython-36.pyc | Bin 0 -> 4198 bytes
 ...f_att_lstm_supervised_train.cpython-36.pyc | Bin 0 -> 5338 bytes
 .../self_att_lstm_supervised_predict.py       | 131 ++++++++
 .../self_att_lstm_supervised_train.py         | 154 +++++++++
 deeplog_detection.py                          |  73 ++--
 ecoder_anomaly_detection.py                   |  71 ++++
 extractfeature/hdfs_deeplog_preprocessor.py   | 261 +++++++++++++++
 .../hdfs_fs_deeplog_preprocessor.py           |  91 ++---
 extractfeature/hdfs_ft_preprocessor.py        |  58 +++-
 extractfeature/hdfs_robust_preprocessor.py    | 166 +++++++++
 .../k8s/__pycache__/__init__.cpython-37.pyc   | Bin 0 -> 173 bytes
 .../log_preprocessor.cpython-36.pyc           | Bin 7946 -> 7946 bytes
 .../log_preprocessor.cpython-37.pyc           | Bin 0 -> 7916 bytes
 java/deeplog_java.py                          | 266 +++++++++++++++
 java/detect_log/clusters/1                    |   1 +
 java/detect_log/clusters/2                    |   1 +
 java/detect_log/clusters/3                    |   1 +
 java/detect_log/clusters/4                    |   1 +
 java/detect_log/detect.log                    |  10 +
 java/detect_log/logkey.txt                    |   1 +
 java/detect_log/logvalue/1                    |   1 +
 java/detect_log/logvalue/10                   |   1 +
 java/detect_log/logvalue/11                   |   1 +
 java/detect_log/logvalue/12                   |   1 +
 java/detect_log/logvalue/13                   |   1 +
 java/detect_log/logvalue/14                   |   1 +
 java/detect_log/logvalue/15                   |   1 +
 java/detect_log/logvalue/16                   |   1 +
 java/detect_log/logvalue/17                   |   1 +
 java/detect_log/logvalue/18                   |   1 +
 java/detect_log/logvalue/19                   |   1 +
 java/detect_log/logvalue/2                    |   1 +
 java/detect_log/logvalue/20                   |   1 +
 java/detect_log/logvalue/21                   |   1 +
 java/detect_log/logvalue/22                   |   1 +
 java/detect_log/logvalue/23                   |   1 +
 java/detect_log/logvalue/24                   |   1 +
 java/detect_log/logvalue/25                   |   1 +
 java/detect_log/logvalue/26                   |   1 +
 java/detect_log/logvalue/27                   |   1 +
 java/detect_log/logvalue/28                   |   1 +
 java/detect_log/logvalue/29                   |   1 +
 java/detect_log/logvalue/3                    |   1 +
 java/detect_log/logvalue/30                   |   1 +
 java/detect_log/logvalue/31                   |   1 +
 java/detect_log/logvalue/4                    |   1 +
 java/detect_log/logvalue/5                    |   1 +
 java/detect_log/logvalue/6                    |   1 +
 java/detect_log/logvalue/7                    |   1 +
 java/detect_log/logvalue/8                    |   1 +
 java/detect_log/logvalue/9                    |   1 +
 java/java.iml                                 |  11 +
 java/out/production/java/deeplog.class        | Bin 0 -> 1831 bytes
 java/src/deeplog.java                         |  29 ++
 l_a_d_bi_lstm.py                              |  91 +++++
 log_anomaly_detection.py                      |  26 +-
 log_deep_data_anomaly.py                      |  69 ++++
 log_predict.py                                | 305 +++++++++++++++++
 logparsing/converter/__init__.py              |   1 +
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 171 bytes
 .../__pycache__/eventid2number.cpython-36.pyc | Bin 0 -> 496 bytes
 logparsing/converter/eventid2number.py        |   8 +
 logparsing/converter/logparser2cluster.py     |  25 ++
 logparsing/drain/.gitignore                   |   9 +
 logparsing/drain/CONTRIBUTING.md              |  48 +++
 logparsing/drain/HDFS_drain.py                |  34 ++
 logparsing/drain/LICENSE.txt                  |  21 ++
 logparsing/drain/README.md                    | 169 ++++++++++
 logparsing/drain/__init__.py                  |   0
 logparsing/drain/drain3/__init__.py           |   2 +
 logparsing/drain/drain3/drain.py              | 258 ++++++++++++++
 logparsing/drain/drain3/file_persistence.py   |  25 ++
 logparsing/drain/drain3/kafka_persistence.py  |  45 +++
 logparsing/drain/drain3/masking.py            |  65 ++++
 .../drain/drain3/persistence_handler.py       |  18 +
 logparsing/drain/drain3/template_miner.py     |  98 ++++++
 logparsing/drain/examples/drain3.ini          |  14 +
 logparsing/drain/examples/drain_stdin_demo.py |  36 ++
 logparsing/drain/requirements.txt             |   5 +
 logparsing/drain/setup.cfg                    |   2 +
 logparsing/drain/setup.py                     |  32 ++
 .../__pycache__/__init__.cpython-36.pyc       | Bin 168 -> 168 bytes
 .../__pycache__/__init__.cpython-37.pyc       | Bin 188 -> 146 bytes
 .../fttree/__pycache__/fttree.cpython-36.pyc  | Bin 3218 -> 3210 bytes
 .../fttree/__pycache__/fttree.cpython-37.pyc  | Bin 3238 -> 3188 bytes
 logparsing/fttree/fttree.py                   |  10 +-
 robust_anomaly_detection.py                   | 101 ++++++
 self_att_lstm_anomaly_detection.py            | 142 ++++++++
 self_att_supervised_detection.py              |  62 ++++
 148 files changed, 4413 insertions(+), 1015 deletions(-)
 create mode 100644 HDFS_drain3_state.bin
 create mode 100644 anomalydetection/att_all_you_need/__init__.py
 create mode 100644 anomalydetection/att_all_you_need/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/att_all_you_need/__pycache__/encoder_self_att_predict.cpython-36.pyc
 create mode 100644 anomalydetection/att_all_you_need/__pycache__/encoder_self_att_train.cpython-36.pyc
 create mode 100644 anomalydetection/att_all_you_need/encoder_self_att_predict.py
 create mode 100644 anomalydetection/att_all_you_need/encoder_self_att_train.py
 create mode 100644 anomalydetection/bi_lstm_only/__init__.py
 create mode 100644 anomalydetection/bi_lstm_only/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/bi_lstm_only/__pycache__/bi_lstm_predict.cpython-36.pyc
 create mode 100644 anomalydetection/bi_lstm_only/__pycache__/bi_lstm_train.cpython-36.pyc
 create mode 100644 anomalydetection/bi_lstm_only/bi_lstm_predict.py
 create mode 100644 anomalydetection/bi_lstm_only/bi_lstm_train.py
 create mode 100644 anomalydetection/deeplog/Model1/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/deeplog/Model1/__pycache__/log_key_LSTM_train.cpython-36.pyc
 create mode 100644 anomalydetection/deeplog/Model2/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/deeplog/Model2/__pycache__/variable_LSTM_train.cpython-36.pyc
 create mode 100644 anomalydetection/deeplog/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/deeplog/__pycache__/log_predict.cpython-36.pyc
 delete mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc
 delete mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc
 delete mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc
 delete mode 100644 anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_predict.py
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_quantitive_train.py
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_sequence_predict.py
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_sequence_train.py
 delete mode 100644 anomalydetection/loganomaly/log_anomaly_train.py
 create mode 100644 anomalydetection/self_att_lstm/__init__.py
 create mode 100644 anomalydetection/self_att_lstm/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm/__pycache__/self_att_lstm_predict.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm/__pycache__/self_att_lstm_train.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm/self_att_lstm_predict.py
 create mode 100644 anomalydetection/self_att_lstm/self_att_lstm_train.py
 create mode 100644 anomalydetection/self_att_lstm_supervised/__init__.py
 create mode 100644 anomalydetection/self_att_lstm_supervised/__pycache__/__init__.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_predict.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_train.cpython-36.pyc
 create mode 100644 anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_predict.py
 create mode 100644 anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_train.py
 create mode 100644 ecoder_anomaly_detection.py
 create mode 100644 extractfeature/hdfs_deeplog_preprocessor.py
 create mode 100644 extractfeature/hdfs_robust_preprocessor.py
 create mode 100644 extractfeature/k8s/__pycache__/__init__.cpython-37.pyc
 create mode 100644 extractfeature/k8s/__pycache__/log_preprocessor.cpython-37.pyc
 create mode 100644 java/deeplog_java.py
 create mode 100644 java/detect_log/clusters/1
 create mode 100644 java/detect_log/clusters/2
 create mode 100644 java/detect_log/clusters/3
 create mode 100644 java/detect_log/clusters/4
 create mode 100644 java/detect_log/detect.log
 create mode 100644 java/detect_log/logkey.txt
 create mode 100644 java/detect_log/logvalue/1
 create mode 100644 java/detect_log/logvalue/10
 create mode 100644 java/detect_log/logvalue/11
 create mode 100644 java/detect_log/logvalue/12
 create mode 100644 java/detect_log/logvalue/13
 create mode 100644 java/detect_log/logvalue/14
 create mode 100644 java/detect_log/logvalue/15
 create mode 100644 java/detect_log/logvalue/16
 create mode 100644 java/detect_log/logvalue/17
 create mode 100644 java/detect_log/logvalue/18
 create mode 100644 java/detect_log/logvalue/19
 create mode 100644 java/detect_log/logvalue/2
 create mode 100644 java/detect_log/logvalue/20
 create mode 100644 java/detect_log/logvalue/21
 create mode 100644 java/detect_log/logvalue/22
 create mode 100644 java/detect_log/logvalue/23
 create mode 100644 java/detect_log/logvalue/24
 create mode 100644 java/detect_log/logvalue/25
 create mode 100644 java/detect_log/logvalue/26
 create mode 100644 java/detect_log/logvalue/27
 create mode 100644 java/detect_log/logvalue/28
 create mode 100644 java/detect_log/logvalue/29
 create mode 100644 java/detect_log/logvalue/3
 create mode 100644 java/detect_log/logvalue/30
 create mode 100644 java/detect_log/logvalue/31
 create mode 100644 java/detect_log/logvalue/4
 create mode 100644 java/detect_log/logvalue/5
 create mode 100644 java/detect_log/logvalue/6
 create mode 100644 java/detect_log/logvalue/7
 create mode 100644 java/detect_log/logvalue/8
 create mode 100644 java/detect_log/logvalue/9
 create mode 100644 java/java.iml
 create mode 100644 java/out/production/java/deeplog.class
 create mode 100644 java/src/deeplog.java
 create mode 100644 l_a_d_bi_lstm.py
 create mode 100644 log_deep_data_anomaly.py
 create mode 100644 log_predict.py
 create mode 100644 logparsing/converter/__init__.py
 create mode 100644 logparsing/converter/__pycache__/__init__.cpython-36.pyc
 create mode 100644 logparsing/converter/__pycache__/eventid2number.cpython-36.pyc
 create mode 100644 logparsing/converter/eventid2number.py
 create mode 100644 logparsing/converter/logparser2cluster.py
 create mode 100644 logparsing/drain/.gitignore
 create mode 100644 logparsing/drain/CONTRIBUTING.md
 create mode 100644 logparsing/drain/HDFS_drain.py
 create mode 100644 logparsing/drain/LICENSE.txt
 create mode 100644 logparsing/drain/README.md
 delete mode 100644 logparsing/drain/__init__.py
 create mode 100644 logparsing/drain/drain3/__init__.py
 create mode 100644 logparsing/drain/drain3/drain.py
 create mode 100644 logparsing/drain/drain3/file_persistence.py
 create mode 100644 logparsing/drain/drain3/kafka_persistence.py
 create mode 100644 logparsing/drain/drain3/masking.py
 create mode 100644 logparsing/drain/drain3/persistence_handler.py
 create mode 100644 logparsing/drain/drain3/template_miner.py
 create mode 100644 logparsing/drain/examples/drain3.ini
 create mode 100644 logparsing/drain/examples/drain_stdin_demo.py
 create mode 100644 logparsing/drain/requirements.txt
 create mode 100644 logparsing/drain/setup.cfg
 create mode 100644 logparsing/drain/setup.py
 create mode 100644 robust_anomaly_detection.py
 create mode 100644 self_att_lstm_anomaly_detection.py
 create mode 100644 self_att_supervised_detection.py

diff --git a/.gitignore b/.gitignore
index 6eab8ea..093dfa7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,5 +3,6 @@
 .idea/
 */.idea/
 /Data/
+.pyc
 
 */__pycache__/
\ No newline at end of file
diff --git a/HDFS_drain3_state.bin b/HDFS_drain3_state.bin
new file mode 100644
index 0000000..081c98d
--- /dev/null
+++ b/HDFS_drain3_state.bin
@@ -0,0 +1 @@
+eJztWm1v2zYQ/iuFsQ9tUagk9W5sA5qkAYJ18RAH24AiMBSJTtToxaPotFnR/z5SlC1SFv0WxfEQf5F0InnkHZ873pH83ps8vM+vv+CQ9vqvekl+MwlIEWc3RkSCOBNPsyJO+LP37lUvwhN6y+oj9l3E6agkgGExMg2+jcLbOIkIzthPCAD7SfKcjrI8wuzP9016POdtpA45szv8wBu+vhgMLt/0xI8RzUWvdS8QPbYzOO8MIm03P7/99bH9oFoozk3X1Q9WECbTgmJSMPrzRn1+ym+ORVvOn9UdUZxOkoBi1tEdzkqOPeBBCHxeoxpI9To7Px2UIx4XxklAAy7BT/zj7xDH95j0eeEF5gTrnxPXSR7eSRwKEvYlMsIFndNXtWCjOOKyfAAAwLJZ/C+X3XVcEzH5n1vkIy6VkLMS+uO3kM1inJdmIYyDiPKjmQbGOWnTyJfgPjDi3Pg4OK2ZtKoC2ZIqnlwNCALXBvwLItPdQBUQGMgGBjQN6Hl9m80g6Cvq+Upi2oTHdXI3Mj3b8xD0geX5FvIt0ykHQnP+TGNChAJL/tAwfQM6QPDX6MuV9AV3oq8SrJCJoNXXH0F4h+kFLiZ5FlUaa/xboQ+wgLcZhM4G898l3yPCB8q/JvEEa5TkK0q6+tF0L1f8h9+dD/UPLnQGitPheZDi4oHxSsWEfRoc//aWf/GCYVlgBAkzEsa9tLR+g2O7z0Syo7Bdz9uRz4RmG+7LgQ/DIMsqvP+JSTyOmUwVfotpGGIc4Uhyk1rhoCUJZ7q+pcMshB0u/PCA2m1dWcVCs/zpQWzKC7/l2uYeLPzNWKeqO8TkXqC3KZ5YvPRCyt7XAu4eiLi4oCsBHWNPH8olpZxp3cy2m64sretpDdfs0HDNg+GuZbjVjLeCOB/P502yZ5Kny7EtO2pmwJapnXCnwwl3DhO+YXwRRUOaExzVAUYJgN+DCf+eTiLWdSS7u7jgT9ZOwKX2cQ2UtMPCVmDhQbCXTu/xCV0zGj/OWQAUzlgSXAjveS28KWbStAfoQNKXrTUhu0MTsg8mtHLlX+Yvl21ytLrTdkNxpIn3nSfPX4FpI5G/+rIWVN2oL8Xyl72qnN2FBvTNZTk78CShd7XVowq8wnMGxWJ8R/Ak4fkMXihhvjPgkH5dvGlopT1Cko39yadcK/4M9UqMSwMy27ahtwQHku+nJMiKMW51hwsYaRccPoPgm9g737SqXX/VkkhOAK+zI3OcT5OydpZTwUCocRZMFZT9STUqkrezkOlolwLQ4VIADktBKzQuK8TLFrJp7ifPp+f/7zGvt2054rP2QM7NNi7QQji4/pY9lNfwZzy5OB1y4VnEWcp6ghO8sAM/S+viZHlYAuUV2tMHpFaHXsh6oV5ot4FJ+3TLe8neHlhvtzFJu8zy1qO3j/uOXUUj22WkcqymdwBuhw7AfaEOoHr99eHifA0PsHxT5wJH0ywKsnKO1XIx/f9MWaLaBJJ0HMNe9Ur/rs5gHRcCz3MsDVpkBwJ3bEyy4lbGb4t+oy2FuRGRe8c2Zco2hXQ2hTq0KfTCberpIsMMU2OY80aXcYrzKVURAhGL/cs8P42TROylUlGxdO23VQT2NZhfk6iMMLzlZ6hJDclrLEAWRA9SNU4boj5/9uuBMbxWPIpqhMeC/BwK2AqbX5WsI3MtsHYYAaKXGgE2wLrGVtvcMWWsLzKdsDlt+qhZkZjtJYA7G9ROXwLftmCqX63IT/CYGhrEKYf+WsR1eOKPDif+6xyT8Cm/aaaSsxwyH4+r5U9seS/kmtWuF1hYZFNMg3ZOK0J3JCfc2qshqMMDR/TiDhy3vhm4xZnakntw++bo+C2t5Jf3jeuPniVusBKc5hRXxertxatysWbC+b6zlXOUd2S0V/hgh3f44OES365y9CYDAYz7IImjd/wryEpe/HadSExETMg7p+LuJ801uDHlwyZtGNfhaYL2MOHpp7JdAWgdw1kYWNneniWyJQVdhURqqamSFlRJXyEdUyFttTJSO7IbldWO3AaptoWNftXKTqMjlXTUtkglLXVUrlrqqR35amVfrdwYMmi0VUshbNANUmXmNXgjbgL/Adcvbuk=
\ No newline at end of file
diff --git a/anomalydetection/att_all_you_need/__init__.py b/anomalydetection/att_all_you_need/__init__.py
new file mode 100644
index 0000000..9764abf
--- /dev/null
+++ b/anomalydetection/att_all_you_need/__init__.py
@@ -0,0 +1 @@
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/anomalydetection/att_all_you_need/__pycache__/__init__.cpython-36.pyc b/anomalydetection/att_all_you_need/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6c90fd720361723419c0fcde2a51ccddbc90b2b
GIT binary patch
literal 184
zcmXr!<>gvawmD9mfq~&M5W@i@kmUfx#VkM~g&~+hlhJP_LlH<ALHtT}wu<qsOv}tk
zEe6s#sfk5-nR)3kxs~PlMa3EU1u;4K>4|xXIhDnk#W6nl>5edVVqShOP&g&EBsIAt
zGe0jTv7{tEF()U!GQTuFFEuqKCO$qhFS8^*Uaz3?7Kcr4eoARhsvXGsVjyM!0G*mP
A0ssI2

literal 0
HcmV?d00001

diff --git a/anomalydetection/att_all_you_need/__pycache__/encoder_self_att_predict.cpython-36.pyc b/anomalydetection/att_all_you_need/__pycache__/encoder_self_att_predict.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b78c8c0473a30d3a8ac483c60ae4a626baf868b3
GIT binary patch
literal 4003
zcmZ`+&2JmW6`$E%l1qwT`m_|wvVtZ?*gAsU91<9YlQfA71diaw4qUKux9gn|wbXK#
znjI>Vy1bML&`Xf?pD2)`r~Vl|<j_ljITh&1z4g*VP_+HMSxHvZmc+c7dHZH|=Dpwh
z*f(0O#%tgF`RAW3GWJ(?;pbrd4qE;@5WxgbSikIfpWC_9cWv+WE4HuptG2HV{l0HU
z^?u#<jeY~Ylgy2p{pP6EZ*khmP1>VQzr&ei!!FwVaAC&OSa_);Dl^{iimIr6#`=rG
z7j^VYq9NwcFN>yVp<fYg(LsMjbj3XSRly&z-okU7mu>eP(|Rn^JX7z7I?Sau4H`Vi
zLLpU;o7x9ylwoMRXd*)6M&n6DSEiqfM&tL;@^z4c6@1^FI)yW2GdARC4w^e-cp=~J
zd8QI2St?CcNF7FdrjkxZ<6~3RSrX^E=NdOxk(mPt#-Rveab$d&jKX~XYo^+G$G4OB
z?(BSYyc;JnN1w=0rExmg86AI`seCUR?<CnEOvB_jkMo@e+2Gx?ahPT!u<>dc={QSw
zLal=^NrK~S5~Na!9a-Kr$Yrt{(9^M!B98PH&e$4-`!YB?%nDuE17WW3sV@4+rx||r
z(`L2uAtsvi;ugGcifU0SDw@yyp##dNe&G*Y?F}o1C%ACFaAuq{@TxP$nXU<U=oj7$
zlEj4$y=_&+j<8E}6pn)=i$c1|NqYnvCD57PyR-SrX|Jy6Y^3JHIFiQEndu}jX^`ts
z%Yg1=Dmqgzfk%Af$-^))m9dIbZK}JO8il%7H;p(QPjrySC(_LA#X`umoHSs#APJA9
z%FP^&qa@68nVZIolbD)N**KeM(<F&(H`tS*$W3*;ixZ4Y)r#J>RxL?~4!v9+R31!Z
zs^c&Ttk@<V<qZ(#tn(%A^EOxC!|WW4bYTLdRr@Ur25jma-6+`m?CHi7>KA+sW>|xN
z?R%<05G`C%>_OuRD;y}--Bz5|{qyU;|LLP=58sBCtKoPoQvu-CPoW@%sp3k|3e*%Q
z8lJ(&m#?r;J_&N*EeUis4)$Ne&gJ>J#~Zv$=kI}cetx=J1<juS8w>=WIs@EdfNMEy
z>Q23BMZ>8I2H;fjw087WQPTj`jPEn`hr$ugo4B)Zh4&dNs-lAX*4E&D0ut}jD!@(f
zf`9I;v8j)h{tPp${WT!x>5AY3K&{~AlU25aH2|JFtdb-~-mt1`*crRh&Zoev+4t5C
zUU|Z_uj|7G>DBfGU1wS^>d>xVYWF#*I3hwniXLRRM`-yH$N;|bv^nMQ864#|&Vl!+
zbbA=X1vU|`oUL2NiNz(If7`fAAtcI1<9lz?ZnSlkoxDDfsZ<1dl|7r}z_L65aG%L!
z>n^!ZlHbGNY^CuZS=>ePTfS*3R<1W^oUt&z0(gTcKQwM4Q&Y*uNem3gBMpGSi?D@y
z1@BZpAVP0f%OHqtHoEp2rcN=-`l#vP6o7Xm1MD@>d!}>hA#kA_4h=slJx?GOo4FEj
zft}PT7M8<?R$-h5w5{PWodKcMCn?-gUs8}+`@fnXZ^<`67_iv|Iz6D%=j*(Q|2pO?
z=vTNRXnqH;Eif!ze~$r~Ou*h>cBcq#LnHwp&V7Om_7nCo`zh0I!G=HtC(rcIDo11M
zoqN+NX0@Ytit4a}bb;UppS@+#>g+ozDQd%-s0_I9N<<!P6Q^)B@H~T8TQA;*7kl8=
zXr}8xsbjHM0k3W)-Xe*jvP-^R-8NiVcURxX+2fl)<nw>y2RnHgeBOA&JWCbDd9?}x
zgYSi7sVKTD@>OeW<SVUg_{TaIMm1f;U<8FUIFwOondTYC+G*Yf%0M5FB?1FUnZ~nF
zM|;+!>K?s{=cbj*QHbyt1=gq)d5B<WJhJ^|yRHwU4$3Xg7kwJ*J)1Otg#AuU+eFwT
z(^B~H_yP>P;H+TWpio2(7&SrY_cGeKY3~BcHiJ?Aa*Ik)FB-+%%%3)4WMueh3!$iu
z)<Nr{%~ME1)^er`F!@4(@PNPjg*RQ)p0EJW6*BTy?8|>~cJvQ~ltl_qPye>#OqbA>
zrz<e+GK}kBc4fK>`Fe2$a9XybRg6}P<zYi~U{E%5hjY3~Pm3m2t<D@2P3&_=@LdGD
zVGEFIqjk`_IPY4qCaU}1a9%H9%}aW5xFl-AN61{EAPL?|(Jo#Rm3^mJA-cqd%QJ@1
z+Acche13u7Ddxck=aT$<(LLv1l<ywwK4C8$18?hIVMPaR{tBC3#T;YW?<=l;1&yw!
zT~ROEORRiyl}*<%g6yfxZy|H-f()KO=322{TsxQfBV;yGl!nDR_PK^;pF(b<e7~rJ
zcNO#Nu+rRc1$UjNC$rLGo@j!1omLea=e(A6Hj9IzwaTWOSlfo&W|<lU)qUME@ZU!g
ztx;^UHRdT^sjjHydz0N=gs{dK!E7W)nL0jcou=DO6f8<xA01e1vcJ6jo$TR)YqJN{
zU^aXBQ>3s9nZx=^&rvraVEFMR_fcD5XJ}^$V?$);W#9kE5YGi<pHp~0*|`6ZimjWQ
z_qWUb@#*}rou7R0ZWQ6covk-^A@oqmD5ltc#(MSU=Eo8VD7nD6fBW^34D*SS=ggDE
z`*9lQds5IARJozrJ?Y9M9OqI5Iv$biRDFOW^{&{O$auuH;aZV?Yyc~>Q7z4afkMGo
zm&iCaF<ReOZ$Zv@hq3%r;R}bU1tK*fRM*<7)5boOL^%a7o92v@j!h+0Dm+H?n~c;N
zWP2~GmuSgVBJ@?Dt`WIIgp!PDYl%F3o($C|#QaX&w4wJpE!ZSN@yK+`l=-3}G!2Ul
zs{!LYerTNgXpgtiZ=3n^1bdba&72)kaY<h@xt3$~A?<#f$TkrQzUl^%S3!*X?t=%0
zKQ#PfY<U~gC^F59dcOoE(|7?Lx>NoDggGmYk9ceUZo}zfhGgorZRA~YZXo(D@m2f3
z!MosBA@hPRVYXxo-X>QX`}NkWCDo5HMkyVpBFxnfF)35iFNp1Ax$$jzzm=viQmTGt
zr4^*Jk?EF?COS^?Ez(L^t`JoEZ(aUM+PeIewDn(KNwy5JUmPWDsEjquI1j?ZFh&iP
zNSh)pHf_;cLX+IqUg?4!=?hQU&j9}}>i$V0@6m4gCWw!eRsI{c@A9U*TJak`{TBSi
F{{Z?f1Ni^|

literal 0
HcmV?d00001

diff --git a/anomalydetection/att_all_you_need/__pycache__/encoder_self_att_train.cpython-36.pyc b/anomalydetection/att_all_you_need/__pycache__/encoder_self_att_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d3109c620c61e435a3a8d7bbd0590da24102242
GIT binary patch
literal 7139
zcmbVR%WovdeeUYl^z=M9BxlJbmy&2%ay*JmDRQ2&q-dqp+HqD3+Y+K=yz55qR1ewB
zndxCw4VR>P0z@NYiNH=|*tzJG<QC+=$RR+G91R4hV{*w}bJQWp_p6>6k_)YYc!;X5
z_w%do_uw}hjqvaO<==m_@f())pVp<Hi}c$F@_!I<i?e~%Ex#<YyS6EDGPmoRl$Y6=
z->qCJwFkkl+N}=5Zpg}>wQk+CXmp$AyV6}T-&VI}zN_6;^Ihw%neTdcJ+ZPj!nJH;
zW{ID0XJm7C#=33pasN}RyZLR4S9tKL#e>AouFstA7D}o-L`gU=*+xl?*HKcRm)t-}
zgEvvqoR{qI72d*l&+t{ghVQd{owxCQj<XM}&b7xl1M6<b){PI7k<7)dSj93?I;6y%
zJm!heZg0Y4?exZzo=wg1a}X%~E`r=aR9FQ&bf$J;XV%Qh7=n%9%q+~p?si=5^#=JU
z(LPU9+&j?TXfhn1YG37pRH}}n9VvR<8lq?%^C;yfI-txjmWTgfi8|)^LcaU@z4uT1
z=^&B#4w6`m($W6C;pxL%$b)=*Z;<cDqj+#CQ+e-BzJGI(jz{?r#h5zjsWczmi<OGv
z!5})#C($TL_`PJ*%dyu;CWC%NwMxY4Xb&rF4CBKjS}bLgHcSYzoWK=Le0!8x{7elp
z@@t3^tIN0rMEQ@bKI1lbKCN~g?s5<3?>@3ztLx!(D!TGsIs0RdUcGQW7=3*NI&1lt
zh}8UKnRSlyj%)>#7VO-f*~odw`9*b}s~{IVvL3NV_9N$!+xIx;|1?n50vNkx&De~g
zPl#5v^D<pmjjV~Va&GUVWX7`AIohGsD$R3_n>F(QzgV+#x5uy|`VkBQgmnZ1g?%jU
zetXK4b#9+K=k7VfK;F;nBlm;y9y?6e=R3X=pC$t6v-e)rEb4d!kK?qL=n5D4IG-q8
zIY@cL)1j`6qJt#nQv2gR6&Of}24?*te1kC5%ZKB)zDcW~CHdaj3-d|#q9`4uDvI{r
z805WpAm2h_(V?@hJvkWz=E0(wXrsTjM<eauNk>4LZm9e)N`?;-&H*@GeQ%j;jB|OZ
zCVW5VlR<I^qiavZqkRB=Kzo$Ba;w~txJILUDi?SKt~?S-*KQB;Sd|Y@+bYM3uuOEt
z0gNZA6QUh(*{!A{WNGu=u&go%v4H`*!6+&x5(H6kog$j4+)Ox(PohCG+E)iS{c05T
z2C<Y8PTbU&Eoq!fz3FJh4Mfi)$TdV(7}~zwWKDa+YuR;s+dgYOdB#10EBy!)1YbiD
zpCeJQ3}m}yO&yc6k#ZF%motFU&Kypla|?$9Xqi`7nLlGw`c@Ev!oW1Ewt=oJEW8=p
z1U#qg<T~=TTh{&N)Gz#k5lpA-=tmz>yE^rnRxnGWAPTpBj7M00`_2~iuM`!)*}W?W
z&W}y>zxZ{UxPyZjuy_&W+K<b-tWHMqXp$snxEqm-lURwbqyEwvK1<mSMY|Mzm7<pr
z>55GEhk437f%r`-q}MHKR1-dkRqr5@DMqaj&)}Z416*m(+^FeaqMcaKKwek+xp){0
z{x(X=yYWI6+RwYYZk;uqknOOumBro6+jf{}p?SwAsE^`d5=A<UqM?B-()B1hn#6;$
z#@xL41GLwTIb3=P7&E$93C8;d>dk`03Q}J|Fyt77&1<Hlhmz%QfeiTqiWcN(sL_xU
zs1f9Ng&IAvil#k+5&h`vA<#hbdx$O=Q2{_DL?ZW6+EWG+g$uyA;IfJWkO~KYbC9c|
zWk`&Obj|P@BvoB(W8`xCB-wP;kks8=40U@U9_EiN8c}JZ-!I=@ncaMUGEnJvz*RSu
zN=77a%m})<=v@}-o$uu`rRs;NOm0I8&F`sfeH{o`)<*q{YC*?mUTnuNvGl~lT|WE8
z1ubD?cGX39NkFjt(5|yOYcZGEEMRBXF0=BIHJ2}%j-JB12=cp#@WSw_&7VDU@7Ysl
zibKw*1meGN7H<?eb2)hV7KyAUYDp2n!F%Ezj4A$*BC~~ClzJNxs;<77u1f-0kjS}!
zV~ImTY=$7|fmvSLX?vm5cF)>Z_O^HtzIYw2#J4DVgCgQPgL?5MrOe4uv^YvbiUi6(
zLnyJ%>a7x{C|UlNSpOP|z7*@eQ3Tj!ML^s{;~pI`{T4_!X#WKYF3^r6v^PMehql~S
z_9y1RBc~uefR5MR#??S0@IjrZGjcn)lB-_Hg;#Pl<mz;J7^9(@*$P4nSLV(2g4Owp
zDK!DRq|VOz#k0}Pn^Go%ko}$zxe!Zy>VQPJ5nA-L#50Jr+wVn3CO$Orv5E5wm>Uij
zbdUnNyj4;zaqI#Y%keKH-r~NlLjMzVlGNG)SGH`Q`S?PfL83Jw&(3zA@^qJP;qLNw
zXy6Khc?>T>a4f|ei7YyG27yG+z&2uro&e5t8WcXBfX7Lwl|s(VD$vd#_Mrc}=u<KE
z!BgrhMPTZyPpJ=7kX0cFD`r%T$(<R*D_Vy)tSR(AZXH&|FALX<Vq?~D=4Lfj$J*SY
z3PI@-u6PY%w2HBsMU~gN|5JM!nvtz+r3itI0QI$^c6setYi5};+Ft`RuFf3ft!%BR
zVRW=E!oH2i-YhXA_HcfWNm+YL7~e<OjsDht2O5e)3f?Oycp_S~`KOC=An#Rb_-%^5
zL(v~0((ZAZJS?@D+mx?BmJcUtA<&1h8cqhKq+W_}T_bBF;So%X0c3IBSJ0absC3xN
zLFN0Cd?HIc69Gb45HV>FN`W}l&S7$@z2kT=NwiBtiQl1G_I=I%ShGLr`pGB}`={M1
z42-!9U?&$Gb3tH#gKm=4F5uLMTt~di@aI4425f`1?54Zsw%wK+x@XTl4YLc51GWBo
zG)HXo7YGZQTQB(uCClHE=KdcdonB1Ooex1@8ia!UT|^foSBmku*-;90droE^>ijuH
zL9Z+2M{vPF;wroRmSL3FE*PcbUJ{&3Rr$*~<KJjyq|6d$5L2AJ_^aghk}8(AG69^n
zN&7ac4_<^tiZ8xKERwY~C7}rJHmRjRG-B{Y^wX|L1`}})NpT-h$1XSULn<&FcR_R{
z0e^`gNnNs>4FbsvzjDb7r2Kj?p?&=;%}1LISUnI_veX(#$EB5+?f)|poK2m5y!L%u
zx(yp(>P>w`lxg(wlb<LUwnc>w1zLRp!UQ$`9;}8^JA@wZZCORIh2!F|5XhJXtyN(~
zupg66QAO`62#OtA;v9qqF~}K-u7Z^{*R3}CFbfn|p_WK_87YPpUMWYWxtVqJ@<*1c
zsxYgOz%yT19iY9i2!XiZ5{X?ghUpkSK#*ic8e_Ol#xULORU_C*M+z57mX)?s{^UJw
zq5QvS90HJUoqc1U=p$B1B=QFnso<+Pg5LQc8GM;ttAh^`u+pfPe8G0fl0dsoUE5>c
z4Fn0{o;(&rG1?(}!;|Ads<fY+fSI^<$v%eJGae%+A~X}Uin%p}CU6yXYPw3k82D?{
zfnLFC%ye@zQ@2nIx*bKwNskCM(JOHIM0p?UmJ+&w)ZyHog{w)|76vV4szja<r(|dx
zG35X(rG>$X6i^k$QBRzi-g@d8{RfsWNwc@0?Y2OvYtVRip#9bnhin_PDvnXRe72-N
zlD21X`}auD6~acf|GG7`&GRL}?tBER^83~gtUtw7PhAA>WL#K4?!nYYu5$7=1U}qg
znLDG>SApk%+lRJz01@qRC{^4o@pyELN(u)iOE80-Y%q8WcC3q1ya#5fO5u}r47__N
zGuS=aqZWl{1gn2nGhs|wBU{s#p_dp**$K3p<1V$E$s95K4O|eZi7G`YA~@ibFdi-V
z8ywuG7*c6{agB6^-PO`>piak0q;k62h51T1B95pBlih|)hB3q?Y+1T4FFA&|6Ccr3
zSIJ86>?bOk8^DV`4^wqu{C@w94#uTq;g;ICHy1bMnwwj*H%Kf(brU3_dgO_tA8C}d
zC39<^WBUqxgZFFW^uPc(DcB`Ny=W9QWzT}lLCmI21=R;G%n%C8it>sTI%ndWpSsf)
zDaTN3R*m}8%)%Kl&e~_}<TX_zKg2ETGj{)l={onO?UT<{y=WQc`YYH&*AO<Qo2pT4
zU~C(?>(eb#sf+6vW24w0l?iGRR3uWEKoQP-MH@x4*f1@(4CibYtHpM<&YR$|&8&?c
zX63-bjbbgkHnYHDH^`NYZyTx2nFY>lqwYGD9i4#7;84!CigoU9g7J#B+AdnzjTyUP
z&7hc~-@FEfg$I0cwUa%QJxf^pjL9D$_Z+3+@D9{&0p{I=J9^VX+VrFRbO$T`O3}=o
zpMjxyh<SF<ss>LD&i{y;H9*lMr#TpO<?h+~hva76C1NnhO62RiAKr$yxI%cM2W~W|
ziaeO~`=F3;oQ|SlGR(#41<c{-8|S@)-5-8DedXin{g-zQWx!Dv4RVPAKYo4hxBJtx
z)=eG{7g&R{<;}Cow-W03<`?XhGiPtC&Q{HG&~vvBkD)wB_$58_tJ)h2P_}VwlrA5m
zITC9Kha%ECbmg6Q-@Ai(wU>_-{FW|Fi-p65G=%?I$dYxd2{vu$+yJ<=ama8LBrFA)
z7UHfMZ{czB`s5|l)w4w8(Y}be4j#n4L-GP?H&qFyk}4T%AL1P9tw?ByPiP1mz9iSM
zw3kYr!qrJWZ6u`lnCe{_ALDGLBH_Z*p6Wa!22?R8Xycly!4b=oQR$~@%&FWssHASr
zv-3ODA?3~4%JKrMBF|MRC^q14N(Rw_?o$DmJg!q$4W|>!7~(0l?)rRS<M}mCP3@Wf
zdUYvp7P3IsOp2%t-rQ<W;7kPMjINrCz>Nzq5ncE~s1Q8F&*+#6ih30NIYq={rP~fn
zNmrQwO8ZZ8SFIFHW<3t5M!tq_&=T1WY{`}rzz??uy4nCqh4v2Y&25xHLBiiIL=Z|L
zdUl!kNA&9Kma;W4e&M~*s0353%$f~|bF}NOFB<IiCnNLEjCi2KOKEmnx5_pXl@8<{
z9ZD&@38P(q9FKS`1xLn6064mV8TdDO4g>)>@Ja)>uxF^n&nY5BMvz*g>#2<5<2Z#n
zfqNp-)&Bd&e^|Z>@{AZO!RUZSpqB8aJzhG$rM9YrH*kJ%nZHH)g?tH7V2HhrxMc@U
bVAGkIKj*p~gloUQ_Vu;zu5AXbVD<k1@{PDF

literal 0
HcmV?d00001

diff --git a/anomalydetection/att_all_you_need/encoder_self_att_predict.py b/anomalydetection/att_all_you_need/encoder_self_att_predict.py
new file mode 100644
index 0000000..77a9994
--- /dev/null
+++ b/anomalydetection/att_all_you_need/encoder_self_att_predict.py
@@ -0,0 +1,141 @@
+# -*- coding: UTF-8 -*-
+
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import torch
+import json
+import pandas as pd
+import numpy as np
+import os
+import torch.nn as nn
+import time
+import random
+from torch.utils.data import TensorDataset, DataLoader
+from anomalydetection.att_all_you_need.encoder_self_att_train import Encoder
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# len(line) < window_length
+
+
+def make_src_mask(src, src_pad_idx):
+    # src = [batch size, src len]
+
+    src_mask = (src != src_pad_idx) #
+
+    # src_mask = [batch size, src len] #
+
+    return src_mask.clone().detach().numpy().tolist()
+
+
+def load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, dropout, num_of_heads, pf_dim):
+
+    model1 = Encoder(input_size, num_classes, hidden_size, num_layers, num_of_heads, pf_dim, dropout, device).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+
+def generate_robust_seq_label(file_path, sequence_length):
+    num_of_sessions = 0
+    input_data, output_data, mask_data = [], [], []
+    train_file = pd.read_csv(file_path)
+    i = 0
+    while i < len(train_file):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        input_data.append(line)
+        output_data.append(int(train_file["label"][i]))
+        i += 1
+    data_set = TensorDataset(torch.tensor(input_data), torch.tensor(output_data))
+    return data_set
+
+
+def get_batch_semantic_with_mask(seq, pattern_vec_file):
+    with open(pattern_vec_file, 'r') as pattern_file:
+        class_type_to_vec = json.load(pattern_file)
+    print(seq.shape)
+    batch_data = []
+    mask_data = []
+    for s in seq:
+        semantic_line = []
+        for event in s.numpy().tolist():
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event)])
+        batch_data.append(semantic_line)
+        mask = make_src_mask(s, 0)
+        mask_data.append(mask)
+    return batch_data, mask_data
+
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, sequence_length, model_path, test_file_path, batch_size, pattern_vec_json, dropout, num_of_heads, pf_dim):
+
+    sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, dropout, num_of_heads, pf_dim)
+
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+
+    # create data set
+    sequence_data_set = generate_robust_seq_label(test_file_path, sequence_length)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=False, pin_memory=False)
+
+    print('predict start')
+    with torch.no_grad():
+        for step, (seq, label) in enumerate(data_loader):
+            # first traverse [0, window_size)
+            batch_data, mask_data = get_batch_semantic_with_mask(seq, pattern_vec_json)
+            seq = torch.tensor(batch_data)
+            mask_data = torch.tensor(mask_data)
+            seq = seq.view(-1, sequence_length, input_size).to(device)
+            #label = torch.tensor(label).view(-1).to(device)
+            output = sequential_model(seq, mask_data)[:, 0].clone().detach().cpu().numpy()
+            predicted = (output > 0.5).astype(int)
+            label = np.array([y for y in label])
+            TP += ((predicted == 1) * (label == 1)).sum()
+            FP += ((predicted == 1) * (label == 0)).sum()
+            FN += ((predicted == 0) * (label == 1)).sum()
+            TN += ((predicted == 0) * (label == 0)).sum()
+    ALL = TP + TN + FP + FN
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
\ No newline at end of file
diff --git a/anomalydetection/att_all_you_need/encoder_self_att_train.py b/anomalydetection/att_all_you_need/encoder_self_att_train.py
new file mode 100644
index 0000000..c9e532e
--- /dev/null
+++ b/anomalydetection/att_all_you_need/encoder_self_att_train.py
@@ -0,0 +1,296 @@
+# -*- coding: UTF-8 -*-
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import TensorDataset, DataLoader
+
+import pandas as pd
+
+
+import numpy as np
+
+import random
+import math
+import time
+import json
+import os
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def make_src_mask(src, src_pad_idx):
+    # src = [batch size, src len]
+
+    src_mask = (src != src_pad_idx) #
+
+    # src_mask = [batch size, src len] #
+
+    return src_mask.clone().detach().numpy().tolist()
+
+
+class Encoder(nn.Module):
+    def __init__(self,
+                 input_dim,
+                 output_dim,  #
+                 hid_dim,
+                 n_layers,
+                 n_heads,
+                 pf_dim,
+                 dropout,
+                 device,
+                 max_length=100):
+        super().__init__()
+
+        self.device = device
+
+        self.tok_embedding = nn.Linear(input_dim, hid_dim)  #
+        self.pos_embedding = nn.Embedding(max_length, hid_dim)
+
+        self.layers = nn.ModuleList([EncoderLayer(hid_dim,
+                                                  n_heads,
+                                                  pf_dim,
+                                                  dropout,
+                                                  device)
+                                     for _ in range(n_layers)])
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
+
+        self.output = nn.Linear(hid_dim, output_dim)  #
+
+    def forward(self, src, src_mask):
+        # src = [batch size, src len, input_dim] #
+        # src_mask = [batch size,1, 1, src len] #
+
+
+        batch_size = src.shape[0]
+        src_len = src.shape[1]
+
+        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
+
+        # pos = [batch size, src len]
+
+        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
+
+        # src = [batch size, src len, hid dim]
+
+        for layer in self.layers:
+            src = layer(src, src_mask)
+
+        # src = [batch size, src len, hid dim]
+        output = self.output(src)  #
+        output = torch.sigmoid(output[:, -1, :])  #
+        return output
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self,
+                 hid_dim,
+                 n_heads,
+                 pf_dim,
+                 dropout,
+                 device):
+        super().__init__()
+
+        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
+        self.ff_layer_norm = nn.LayerNorm(hid_dim)
+        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
+        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim,
+                                                                     pf_dim,
+                                                                     dropout)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, src, src_mask):
+        # src = [batch size, src len, hid dim]
+        # src_mask = [batch size, src len]
+
+        # self attention
+        _src, _ = self.self_attention(src, src, src, src_mask)
+
+        # dropout, residual connection and layer norm
+        src = self.self_attn_layer_norm(src + self.dropout(_src))
+
+        # src = [batch size, src len, hid dim]
+
+        # positionwise feedforward
+        _src = self.positionwise_feedforward(src)
+
+        # dropout, residual and layer norm
+        src = self.ff_layer_norm(src + self.dropout(_src))
+
+        # src = [batch size, src len, hid dim]
+
+        return src
+
+
+class MultiHeadAttentionLayer(nn.Module):
+    def __init__(self, hid_dim, n_heads, dropout, device):
+        super().__init__()
+
+        assert hid_dim % n_heads == 0
+
+        self.hid_dim = hid_dim
+        self.n_heads = n_heads
+        self.head_dim = hid_dim // n_heads
+
+        self.fc_q = nn.Linear(hid_dim, hid_dim)
+        self.fc_k = nn.Linear(hid_dim, hid_dim)
+        self.fc_v = nn.Linear(hid_dim, hid_dim)
+
+        self.fc_o = nn.Linear(hid_dim, hid_dim)
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
+
+    def forward(self, query, key, value, mask=None):
+        batch_size = query.shape[0]
+
+        # query = [batch size, query len, hid dim]
+        # key = [batch size, key len, hid dim]
+        # value = [batch size, value len, hid dim]
+
+        Q = self.fc_q(query)
+        K = self.fc_k(key)
+        V = self.fc_v(value)
+
+        # Q = [batch size, query len, hid dim]
+        # K = [batch size, key len, hid dim]
+        # V = [batch size, value len, hid dim]
+
+        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
+        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
+        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
+
+        # Q = [batch size, n heads, query len, head dim]
+        # K = [batch size, n heads, key len, head dim]
+        # V = [batch size, n heads, value len, head dim]
+
+        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
+
+        # energy = [batch size, n heads, query len, key len]
+
+        if mask is not None:
+            mask = mask.view(batch_size, 1, 1, -1).to(device)
+            energy = energy.masked_fill(mask == 0, -1e10)
+
+        attention = torch.softmax(energy, dim=-1)
+
+        # attention = [batch size, n heads, query len, key len]
+
+        x = torch.matmul(self.dropout(attention), V)
+
+        # x = [batch size, n heads, query len, head dim]
+
+        x = x.permute(0, 2, 1, 3).contiguous()
+
+        # x = [batch size, query len, n heads, head dim]
+
+        x = x.view(batch_size, -1, self.hid_dim)
+
+        # x = [batch size, query len, hid dim]
+
+        x = self.fc_o(x)
+
+        # x = [batch size, query len, hid dim]
+
+        return x, attention
+
+
+class PositionwiseFeedforwardLayer(nn.Module):
+    def __init__(self, hid_dim, pf_dim, dropout):
+        super().__init__()
+
+        self.fc_1 = nn.Linear(hid_dim, pf_dim)
+        self.fc_2 = nn.Linear(pf_dim, hid_dim)
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # x = [batch size, seq len, hid dim]
+
+        x = self.dropout(torch.relu(self.fc_1(x)))
+
+        # x = [batch size, seq len, pf dim]
+
+        x = self.fc_2(x)
+
+        # x = [batch size, seq len, hid dim]
+
+        return x
+
+
+def generate_robust_seq_label(file_path, sequence_length, pattern_vec_file):
+    num_of_sessions = 0
+    input_data, output_data, mask_data = [], [], []
+    train_file = pd.read_csv(file_path)
+    for i in range(len(train_file)):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        input_data.append(line)
+        output_data.append(int(train_file["label"][i]))
+    data_set = TensorDataset(torch.tensor(input_data), torch.tensor(output_data))
+    return data_set
+
+
+def get_batch_semantic_with_mask(seq, pattern_vec_file):
+    with open(pattern_vec_file, 'r') as pattern_file:
+        class_type_to_vec = json.load(pattern_file)
+    batch_data = []
+    mask_data = []
+    for s in seq:
+        semantic_line = []
+        for event in s.numpy().tolist():
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event)])
+        batch_data.append(semantic_line)
+        mask = make_src_mask(s, 0)
+        mask_data.append(mask)
+    return batch_data, mask_data
+
+
+def train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file, dropout, num_of_heads, pf_dim):
+    print("Train num_classes: ", num_of_classes)
+    model = Encoder(input_size, num_of_classes, hidden_size, num_of_layers, num_of_heads, pf_dim, dropout, device).to(device)
+    # create data set
+    sequence_data_set = generate_robust_seq_label(data_file, sequence_length, pattern_vec_file)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+
+    # Loss and optimizer  classify job
+    criterion = nn.BCELoss()
+    optimizer = optim.Adam(model.parameters())
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, label) in enumerate(data_loader):
+            batch_data, mask_data = get_batch_semantic_with_mask(seq, pattern_vec_file)
+            seq = torch.tensor(batch_data)
+            #print(seq.shape)
+            seq = seq.clone().detach().view(-1, sequence_length, input_size).to(device)
+            #print(seq.shape)
+            output = model(seq, torch.tensor(mask_data))
+
+            loss = criterion(output.squeeze(-1), label.float().to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % num_epochs == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    print('Training finished')
\ No newline at end of file
diff --git a/anomalydetection/bi_lstm_only/__init__.py b/anomalydetection/bi_lstm_only/__init__.py
new file mode 100644
index 0000000..9764abf
--- /dev/null
+++ b/anomalydetection/bi_lstm_only/__init__.py
@@ -0,0 +1 @@
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/anomalydetection/bi_lstm_only/__pycache__/__init__.cpython-36.pyc b/anomalydetection/bi_lstm_only/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d1f1ba59dfc697e4409764c20b836abfe53b45b
GIT binary patch
literal 180
zcmXr!<>g{qH7!n@fq~&M5W@i@kmUfx#VkM~g&~+hlhJP_LlH<ALHtT|wu<qsbj!?1
zEe6s)sfk5-nR)3kxs~PlMa3EU1u;4K>4|xXIhDnk#W6nl>5edVVqShOP&g&EBsIAt
wGe0jTDKkE&xFk0|KQE^;CO$qhFS8^*Uaz3?7Kcr4eoARhsvXGcVjyM!06elYq5uE@

literal 0
HcmV?d00001

diff --git a/anomalydetection/bi_lstm_only/__pycache__/bi_lstm_predict.cpython-36.pyc b/anomalydetection/bi_lstm_only/__pycache__/bi_lstm_predict.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58a695c7b45a8649409368a439d9223c6bdbb144
GIT binary patch
literal 3244
zcmZuzOK%&=5$>J`Um__=qF$D)rQc!JR$^>`Yz)I#V;vy~yn^F3uz@0HFq&>jLk(xB
zo~CWlJtsOx!%qG}PWcl#=a_R}bJ8`)$%iCgH$_P>c8Gpdb#+&DRq?C-s?n&u{pVl*
zn9LFKFLLQ~F#iTE-$Ta;qY2rWjC6-u+UdDFuAO;19!4ke`=y-{C7kriXq8@dOvE?L
zO&#Wq=}wvXtn`lT)L5BSFwU?lt6{7&`aS8+oI(%T?o#c4moc7bFB-B?yU}168C%Tf
zqczdKMa%cmNirsdBdLOYW9P%u!hQ2Zxjko0o{*Pc7oK#b#~kLqBg|v|JEFXvKPJ5r
zD~;(BGRB@UEj;D)%F<P^aZD*WUjKpofSdk~JSV>=%7Ja=ZB0c)q0oG0^w7RX%a_ne
zBB^u~>A5)D8xwOXcG2ywuYEZjBwV}waG?GDBnzeXb17m_`5=iU?n~3Z5mCoEpANr$
zu>0M~R-Eu0<5M1rG)@n8`zNn6ksoG*-6T5*(=a*7<9zpNcJO#Q57Vp<Hsg{<GS1T7
zy*Nm6*$=WbIXNc=g0nc1n}ZWwd6<O#Jr+J%Fqc3hqbmnI<sy`P^I?)jVUj<>WFmBa
zbrXp5tFMDNAd0+f7gQ1j=P`2R2~W{KJI9`JfH1DY4c&yNT}L~cKiBk)z3QZEVEI$+
zVG`#u%KC$eF-Av;jMjh5oT=a^dbeBAp4q6qY`{}p6+C1~obp_haDXVAu4=j(I$a6}
zkYWgkmxg_=8?WP(Wv>Gmb|4S+JRBPw@sm8r`SFma5zjT<Hwxeq6PgQqb!BSUEcm%u
zB(y~9v`KHf>r`9?9hpQmpE0F^W)t=|n7~i)lya2Is7cKCr5;iTN$4xThh**f$}_3@
z9z06GD~*wFvdl`o$}~ZhZ&UKc#Ji4e#rwft%wjQ#eVFLlUMQo(Adg2}m~bHkbtGV5
z;%d@k)a;wTSTG3X;e*cGqFWWl>)Mf-E;0Tpj<}vhuCgGPNQl5>4Kg_s(II>SzV`Sl
z=<x?4PNgpGXQCg<Zb{eTbTE`wv7R}M8RO}MsHMYxkc1~(<a)-;qa@68c)50Ai7wgs
z{m&h~II+YAM#f<hSg|!2oSWDZ=L&7p3j8mA1}d^VOzxnWU|8RKE+H`~YVd#6LdEl9
zhB-z3&6b+!Q41FjiiT?R5R)<8b&6&&YrsSLYSzR6d;<?AQY}?iHPw1ZRGs<Dq^KWn
zTm}qTo<>;tW51Y_m0krPH4kkIXl=Abv?a7<Syg33)hSjENU;jr081Y_#T8a!Wq{R{
zUd@1&(QYh+?$rxmk~6Ggazxg9jWPL1KRly=;9?DJ{Ud$zNUb~p_vJ5&1~_$aI;t+4
zsxhX@TOmi}qa!xdI;*yEk7WSkGO@S0s@9nQOH!<<2JTrm{69IXq}Z@DD9pcr&RMmA
z-e<M<POru0m8Zzq?aj$~Sh=C*)CR^5Bo<VM`D2IS3{{x{tlHDA!TOnATdk>Uss`x&
zVoX=D-{$;PwR%+gNX6fcc4HH)sizg=Z8KBLc3i&Lr>-59A^%@B-T5E2U}YAtt8HiP
zbp34A5%X%v@|Li=XlIKb=`kQ5-Y^>0kN<r+o@zn0;Tz}3fyaIeUtUjJtgaT|H@mV%
zitCsyoy(+<xp7GbE0DRNuB#j8GJk>0&9u!L>N?K3fo4}BcN1qVLaq+pGU!|IR1=<R
zt0l8CMiwIGGi(;TTV|KKdCqIuHJfj0euWgbAvXuP+mph^P_eoT+rog8#g38thX*e|
z(ax!X=^w}VHdZXg*QXdi!FKx}HOP+tf7cN$b0l66XE<pQ(}25}NB1kV{0!AMSQbpn
z(4{gn#r09+Y_S925%TmO*a|H^lP3c{y7RW^-0Msw1kZ<wH1Kt|BgK$+l5meFhCXH3
zc4{<^HnyHkB(}CE<BK!;g{4QoejG(uc(D1^K8|??2#s?9<dk*mZs$2iEi}hk88Ei)
z-|O=*9}0fX94&0cX`CN&W{xmuiLyOv@gyANoCPxO8`<LYfB3qKViA@J^Kh`W=~BHi
zrD9UF43cTjY_tVMSEl7n7+4cG&2l--f&&pUVG5BhNuK7J(B7+<zZTbw)O9NriUaVa
z_z9@)9nm!_KQ-N#rn_spRny%=r<cxN5`pBo3{3g6MM5_%y^oj48Er8<Oj*oOAadP8
zDUn>H!7CmGMoiC{nan0YADH;*`c%jY&{&ukgf5?PwTm6PIh6r{f}>lLf_#Ddx@J?u
zI#oL_o@r+b?Zr06ZS6jO`c%(`d#Nc%3F=+QxX{fDhDi}&N{6mS*)Wv>?xE+Rp%5H|
zy`Q<XE}MQEYcBL_-=>x>WkZSbrmJTu$eE_kG<^=$_n|t9bp3M3Kd)o8i`+Ny%SY%4
z_3(7?P`sLS1x0MhX*m_p(_am<8oh}XBW2_~XUUek7C0TqUcs+Mo0wJTJZ3cqKlDwU
z)ubhychR>&we9XY6)&N^+nA)1ljXW%>-%P!Y9~#_*I3>8^7D7)=H++gIUz+Dr}hP5
zQ%<1P5?zn;Abb_Za9uJ<Vq2cA+Do1}hYCDNQP+nFe`H=Pxj__jrJA|AyW&;qmFoWi
D*#t_c

literal 0
HcmV?d00001

diff --git a/anomalydetection/bi_lstm_only/__pycache__/bi_lstm_train.cpython-36.pyc b/anomalydetection/bi_lstm_only/__pycache__/bi_lstm_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..558b707c5e44e7a59b1e463510ab142525982882
GIT binary patch
literal 3931
zcmaJ^%a0Vt8L#Ts^gMTVS!Q<ulXc{fY-FuT>^wxU<p+=ptQ0`O8o;gIsh-_#db($;
zYk(bhUos??Ea5m8A9HX{`DY^KTBk@k@h5~!^82c10Tap7s`ul2*6;h~-EOz_pMU=C
zvp=0;?BDE_pM(B~D8)CZ1QR@E!@A{(HMGo(oj60s^xVWsykW!i{h^Q6N}H3`u*K<&
zowg^PVQ11EcFovHdy|#n3TF{Z`Y5Z(+JebMxS1uq1t0c*%S1!?Pnhr{D>=2WhwGv#
zT2I*Uv}lVC+JWG=*iQEucw>7zTz7BHCX-Me-;uG3<c_7gx1+4c<&{u{MWl3#I@j}1
zV3BM8_o0l#{WQ|fXeL5!kEXK`U3K!am!0BmbRst7f(@-N*q940tSA1^7PfG}h9g|z
zp>@At4jX!4siEB)xrowHV|fL&ROrg$GgOKxUb2OiSPRBkX_YLnOB=PL+{7z6W^BPr
zS6EM7%r-FVmzJ>CS?MfjceC_Rw@O#u7IdzidBUMqFficm-G=z>{ZNe#gK;birMvsF
zh-EZVah`?gGYd>S<EVBz+AU_&NJ<|g?FT`e#VQDN`!E(F%7P-EN4lHMCP6+9((pKv
zg|@OxJJ)aBzM-A8P!sK4kFzM0+8U2^tEOs>tuBwfpj<#VP7$SJ-HNm6Oqtu|3Mk9I
zA)hbjhW&UPe1~N@Aa1#C2Eix|iy{dA&Yq7yxpeQw@wGUO3bfb3Ll$QT_a?`Wa#<Ya
z(|c)t5JKMLA};P-&ksI2>4#Z9!L*1}t&)5DaX_L3d6pi(7$_OW+3xgM_CR|TrC3A7
zc#nJd27C>*!+mSsGxpv+QQnBuOFw!*R6ao|uAovZf#DOFFd<`7-&xp+tKjmL^}VY$
zImOt5dHEi>n9AkoP`mR;=7rIo+yDWiOl_%Ldm?%mkD?t%Zekv4Cqr6?7qvCID9_PW
zQ@ufpZDaJ>;%{Q2@K7=9JU`FptF>&e?D&p&E;btRK12uN)I#wR2qE}_KI@)USrym^
z3a~%Ii|cl5Z?VcKqO?f(l0<X%BzdLmpnCRS{J=g?iOC;P^+T#oqdF1FHD07`xQ*t)
z>ryqwxqK8#@nf7Q$i10;&Kg)Pd)}>OTCPKz@cS&BMDY7o5KMA0OR3)pf}>fO)+@#d
zYCe97t8{nih@&V?gI8$paCsK%k`!wm6?wK%sLwln-!pTg6LcZpCy9&-KZfivt84{G
zfby(A->e*ARqo?!rIYXlyTa~Ys=U&JYV8GwS{s$GYz5FL{lr_quY^MihdP_33qWaJ
zW&ja)gH`U)`8(_m4DdU4lYI(ku387|fLW|+R~^s;xP#tbtM21(%I=cZm!Q=W?yp$Y
zA}YcY4T59Qe8Rv(O|P}!SJ;xKry3>vxa36f0h9kY`ttD=<)6%MVjm>9Sh;08Apmpn
z@eR1d7FGa*9n9Kh9<y8cX!ht|ci7RFukx;1s-1MoHfmS2pIT<rAs(t;*@F{x_B5Bq
zkLLE>OZT7iZR1ZP@+|sMORw@4N<pwfaIB0gLWj-*z`UT)f_LWPoz12Q680pVYImIG
zp#oqk8DmsT(^vtLc=o#I`IpmhvM<69J|+?<Z2s0kltnUBQBXuj0Mh*^-MyUVqcAN#
zK&RgRf<*$57c9~{J%EYsuU8zV!kZH8fg_!g|HV3Th*2XfII+_x1M*wD7d3zU8dJ|(
zm(#dVqkJ;0*&-Sqn_u{T#!ikr=Wm06E7b8c+UaU1pGKK($|w{mz`u~cz-{tpRQ;T)
zcd2@hs$Zkht_-t-NPFQF$Al#Bm*;80Qzl05tmx(#Fdj@pb*Q_K;!Na^0!VX!VIPx-
z=vnYE8X=s)8mrV(`7#m^OS*Hi$!y>;LKFDXjpekqaYXk{Hemq#)GNyXRYXOBz*wAw
zC4o#qw}C1Yqs+iD*ZS0l0o7}|)ocH+L=wIrq+o~<9h4rvP3|LZIJ||K9{Lw}-?A_>
zuzteNad{KFM{PQTAE5}LmVt(E;k0E#;9*p4fj~hIZ39`=2AsEasgGyOkpmz<;0(@e
za)=Vi33hr3o-aV53!rG0tZXgps;9iNS9+AZh{jW=TER1ImmS>cKjl?l`AKuZs#VoW
z+5m(O9tE$~zUGg8)g^jY*w^g-^XimnR_l+as#mU<$1MO#r%?vgMmb1U@SOT6t6~LI
zIRahH$Dr(%1KhV+u9lnSMm>w?yH&1H76>TVLdHk!I(q911_)Zm+G(0Q`t}Z<t7s>K
z@|0+8K>BiBZImm?=7OJP3jia|EmsCiG#^;<E47uJNzRu3f`846n;4y=KJw7lHEGTn
zZJtqYB<CUd4;HNS1@uy$fo{4R00;2!)g4y$lWpP$y!6anlzH{{=00+c0Iu`Sy#J91
zC&9~n=>2))H_<d79lk$5b(>O)ZE`=8%|yke?c3LO9Ni!fBL^c~(v9M9HXg&_ThllT
zCeb99$9RPvNFb+r87k**U8Q@s?|xpr^Lcgu?QN5}AQcMIyuj6;U)p_dTs`OS%<bK&
zny(t_Sl%8ZtQUuokQYF1=S^(`+O#{Bai&b1lG~V<Z&F3RDG8G#0f(e?NcTTM@=;vH
z=6rg59bN6_Qx#9NL!yK0P$H`0?H2Mw+TV;Iu|YxYj?x_O6IqH1k;ga><LHrYQZ^GD
z$WUm1KO8-v=&c>3^SH54(Nuc~*^^KiP?3b++R6*<;8`OTiVG-3`;+iNgicWdj8lXU
zp}#^A^on={_y*-7+BqvJA(LBFz05#+OGH>&MYm{dj8@bpY05lTHB1eVC`2jQ+Q5WK
zuVEnFG$=uUA+HjPUGj)J2d3o?4I$Dr1xwmBm*`bQg;|u1z~d5Pbldb0aqx#ldymY&
z8r>Y37GR~DM%BpbU@^m?ZW*&1o6CqO+qhM`CfI1N<`cMR>=a?!!Rs*At}iew?csvr
zm#CQK^G&OT(y{uyXZt8?z_0-j%eVR%BeB7^&9Mq(JBOan<sWczXQMWPZ<5vBEUVq7
zT@MZa@-FrbPn~So9nZ4mpA_ACrO&U)18mftI$rGO@QhE5C+dEEcBbOA*d?9S;e}jv
z*sIsVnS!vPkd&>~?o1i3?!-k9J`7`M2#?IWVG_UE@r}Ea46o?uWy0)VhK`UVen64T
W<Pbh2CsZe0^zDwdX|28GpZOnJmK)Ik

literal 0
HcmV?d00001

diff --git a/anomalydetection/bi_lstm_only/bi_lstm_predict.py b/anomalydetection/bi_lstm_only/bi_lstm_predict.py
new file mode 100644
index 0000000..50a5357
--- /dev/null
+++ b/anomalydetection/bi_lstm_only/bi_lstm_predict.py
@@ -0,0 +1,127 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import torch
+import os
+import torch.nn as nn
+import time
+from anomalydetection.bi_lstm_only.bi_lstm_train import Model
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# len(line) < window_length
+
+def generate(name, window_length):
+    log_keys_sequences = list()
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            log_keys_sequences.append(tuple(line))
+    return log_keys_sequences
+
+
+
+def load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path):
+
+    model1 = Model(input_size, hidden_size, num_layers, num_classes, if_bidirectional=True, batch_size=0).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, test_file_path, num_candidates, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+
+    sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path)
+
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    abnormal_loader = generate(test_file_path, window_length)
+    abnormal_label = []
+    with open(anomaly_test_line_path) as f:
+        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for line in abnormal_loader:
+            i = 0
+            # first traverse [0, window_size)
+            while i < len(line) - window_length:
+                lineNum = current_file_line * 200 + i + window_length + 1
+                count_num += 1
+                seq = line[i:i + window_length]
+                label = line[i + window_length]
+                for n in range(len(seq)):
+                    if current_file_line * 200 + i + n + 1 in abnormal_label:
+                        i = i + n + 1
+                        continue
+                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+                #label = torch.tensor(label).view(-1).to(device)
+                output = sequential_model(seq)
+                predicted = torch.argsort(output, 1)[0][-num_candidates:]
+                predicted = filter_small_top_k(predicted, output)
+                print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
+                if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
+                    i += window_length + 1
+                else:
+                    i += 1
+                ALL += 1
+                if vec_to_class_type[tuple(label)] not in predicted:
+                    if lineNum in abnormal_label:
+                        TP += 1
+                    else:
+                        FP += 1
+                else:
+                    if lineNum in abnormal_label:
+                        FN += 1
+                    else:
+                        TN += 1
+            current_file_line += 1
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
\ No newline at end of file
diff --git a/anomalydetection/bi_lstm_only/bi_lstm_train.py b/anomalydetection/bi_lstm_only/bi_lstm_train.py
new file mode 100644
index 0000000..c242a7b
--- /dev/null
+++ b/anomalydetection/bi_lstm_only/bi_lstm_train.py
@@ -0,0 +1,116 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import os
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+
+# use cuda if available  otherwise use cpu
+from torch.autograd import Variable
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class Model(nn.Module):
+    def __init__(self, input_size, hidden_size, num_of_layers, out_size, if_bidirectional, batch_size):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, bidirectional=if_bidirectional)
+        self.fc = nn.Linear(hidden_size*2, out_size)
+        self.batch_size = batch_size
+        if if_bidirectional:
+            self.num_of_directions = 2
+        else:
+            self.num_of_directions = 1
+
+
+        # self.out = nn.Linear(in_features=in_features, out_features=out_features)
+
+
+    def init_hidden(self, size):
+        # size self.batch_size same
+        h0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        return (h0, c0)
+
+    def forward(self, input):
+        # h_n: hidden state h of last time step
+        # c_n: hidden state c of last time step
+        out, _ = self.lstm(input, self.init_hidden(input.size(0)))
+        # out shape [batch, seqlen, numdirec*hidden]
+        out = out[:, -1, :]
+        # tmp1, tmp2 = out.split(self.hidden_size, 1)
+        out = self.fc(out)
+        # print('out[:, -1, :]:')
+        # print(out)
+        return out
+
+
+def generate_seq_label(file_path, window_length, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    with open(file_path, 'r') as file:
+        for line in file.readlines():
+            num_of_sessions += 1
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            if len(line) < window_length + 1:
+                #print(line)
+                continue
+            for i in range(len(line) - window_length):
+                input_data.append(line[i:i + window_length])
+                # line[i] is a list need to read file form a dic{vec:log_key} to get log key
+                output_data.append(vec_to_class_type[line[i + window_length]])
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
+    # log setting
+    log_directory = root_path + 'log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    print("Train num_classes: ", num_of_classes)
+    model = Model(input_size, hidden_size, num_of_layers, num_of_classes, True, batch_size).to(device)
+    # create data set
+    sequence_data_set = generate_seq_label(data_file, window_length, pattern_vec_file)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters())
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, label) in enumerate(data_loader):
+            seq = seq.clone().detach().view(-1, window_length, input_size).to(device)
+            output = model(seq)
+
+            loss = criterion(output, label.to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % num_epochs == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
\ No newline at end of file
diff --git a/anomalydetection/deeplog/Model1/__pycache__/__init__.cpython-36.pyc b/anomalydetection/deeplog/Model1/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..185d304783c47d18f0183c2924f193e1688837a2
GIT binary patch
literal 182
zcmXr!<>lHt%Q=n#2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU&+o^F}{^<nK`M&
zK-woYu_!MyFFhu=vOK@2I3vFxCMQ2VF)uNvvN*Fi#wS1B5yno;%g+T0r=*soCYNO9
v=f$L?rWODd$N1)_q~;jL#K&jmWtPOp>lIYq;;_lhPbtkwwF6mR48#lo$OSYs

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/Model1/__pycache__/__init__.cpython-37.pyc b/anomalydetection/deeplog/Model1/__pycache__/__init__.cpython-37.pyc
index 444811b310f9bc3af53773a8b790a6dfaab45484..956f3f953ba3843c46234b7294ee6f9eb7bc5146 100644
GIT binary patch
delta 60
zcmX@bxPXz{iI<m)0SK0t+?dF1F8|BZ*(#>Eq%@^6COJPPHO41D-7zmQr?NP+I6g16
Md}6m1>t~R106v@($N&HU

delta 102
zcmZ3$c#4tRiI<m)0SMermrmq1_xxG_WV>3$JYBN<>Dpx=>dB7ICp)@hOch@2UH@eF
rwr5+rU#y=R<CCB6n3tGSS)5sn%+@W=$jnQPPf0DCm}tdX3^Eu1k^U;T

diff --git a/anomalydetection/deeplog/Model1/__pycache__/log_key_LSTM_train.cpython-36.pyc b/anomalydetection/deeplog/Model1/__pycache__/log_key_LSTM_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72f5b5826aef5f467d833b88d6489161f1eae1c4
GIT binary patch
literal 3357
zcmaJ@-EI`g6|U;P>G`wAV`F15tdsT4C@TZXN*fUtL12>`gOmk{RmW_rnX0k7>FI8&
zY5<#_T%?Ki3Wcj&<PDNX*sDB29-wY>MIK>qlCP>w77;1ks=rfJr%s)p@6^wS!(Q;W
zKYv@T67o;-!RMiW3q}4O6(fx1Bx+llIgw*2ZstWEdQR@ogD9Y8-pxDnFbe10sB6bG
z??pXMvOdZn8#Y9I&Ah^4enX@FZwL!m=QUv+?qn;C8?CaCbzhTc#Coicc8$>oWHNXM
z8OZ$!)x!si`8*LPk3_1tm^ga)kQcHPcN3LJu5{0I4$6dap}pyXC6PO=7E`nR{pX%_
z<Q_VlM3j-p`Hsveqs)07MlN%ihoimkh({tH$NSp9S2CVY{j;s64Nwd#c?*>yiq@oY
zGN&Pw)J{!ew{}r`%FlwDg5nzXZ#>XCpoO4#jak=a1V^~{A5Nf;CyAOK$Fo#OHR)+z
zE-EgBIZ20coEE8yW8FVa8RJDP(^Ia8#bO?pvp7#qxRBZ@ipV>7@bI4Ya;fG~aF7-}
z5!#teaSKoKe5QM8Q7x3+F1lbXdZ?oQ8KbBCL_*KqIG*N-lyUr5^27Yj^}~B7zfN;5
z(H?+dkrqdX^OF~)kjG_pn3qRMk>n>bm4^rA(e3yBq$uYgGp_hlrDbu*IIpny(5myA
z*%#x$*hm~Jk)*|bbs|E@GeD6>FhW;ofPaIYuGn4s?-en1%rriuA5-q2$O@Gr8LW{x
zHO-*)#`L|$&3qL!bm{!xS=cP=wDy`gjdSQMy`>LYSQ|ele{|p+ww<}#1l+vZgY~@o
zg5unFw)!`|Nmu)-6w_nvpK?)3t7GUNCbK%%jw*G)`15qig}JL3qM8I^1&#Ktg|u^g
zRXfwG+ASAK(^#yTb+#ItRh?NWUL=C8W6-L~xlAw94QGd*4qH{7t(XMHeTrnx;Xb`M
zo|kNqoBkk<pDmKSow0fspBUD|GwV-zp2r^?JfUJ_gfQn@x9pp<4N`=3(6N;NTE)1Q
zxsA2T-_c>v{Si4L4q3WOPmvn9(Kvtl!_t3wTKgGo$X)WdSOzt8`~`uX9ECt+{%fLK
z<<&vvHzW%hL>Yd%3|`(+9TnEFX-&_=7TEpSvQxWRw+`wKe!2yF87m<oy)!CkE#=vh
zN95VwBk~B={4M#Kd_z=E^|L|kq8_r~jble%LmOuq)*-CiX|1fO5Ju-yV{P-9)|3Y*
zRvER4XnNn;3b90sk4+ce$dmaKmfZZ*Y(XKXdq=$BB2he+{29Xe3D5U$<mEKU<xO<j
z<zVJBbyD%kiuTHi7rHBW!g2(;gu^YWoNITURN74o1^7@Rt<Y3?ijhc)BTVPm6(kil
zGhy&coI|ApWnonNv%E~y1mHbGxW`qZj`i?GTCnm(jGag7I9feJ3dyBJOv~sDP#BOm
z>cdwJx|o6X3=bd?#M3m5LPNy$)W_-=|L@%|K`6P2icpXC=m01X(p|cVe~<1^@e9mM
zjf<J@GKv8W>uUdkDd1W#^s;MQ3~i6Oa5tE8i!g5s@KC#^zwFnP`Dh3AU^!G&Ihk9-
zwHnF-P*;@4I`vB9P@;V6QXxiO7N`!~ZxyaLtbI7gNOkKG;2|`r>y5WuQ@y&!dRwIK
z<GlVGx*Qu<YRGb34YQScHCu%f)8z(I#v0mlZ|TcDH8L{YC2#5D&E+N=EVo`(YR$~U
ztr=kGJj(X+LcN`h5eDlh8*IIXTmZIKqV0OM-Uh#m^?H4=zR*%3-%h=eowI3aC)>2`
zR=tyLHRKDj+``;>Lwoj*M+Em>$+qiFHiYbMy=COfE;RHaX&@VdXq;VSs~^(awT+88
zyNiuB2^&0h#P`tUPIjs8HuNo(f5GUo>El80u_f$2YmH0llWZ5-`AI|SuGQZq=x=Qc
z0T21Ue`L=8sgV_Ojx8?}W*hgT0C>jpYmtX^a_w|<n<ewOO{4~IzdY^yl2_&Q_{-CE
zEI_)s(l%|vd1tc7>H0&He0Ggz+qB2!_1%XPSNDG8qdW3=F`MN)>Q!kG&-uI*CqK|D
zh{*s9St@J}u+VV2_mw$u_jj+BS6(e2f3|CL8**lxmlCXBUElwFw$wdKwzPN5h~e&6
z>U7P@g2lTtWPEwdnb^SO<Wub;pv6`6wNpyn$y2FttoAE`aJT3y&SO?=o61CpFwiW_
z4TVAKXmkg;NPdOIWp#3ZuJ+4HrSr%$O4Ge65l9kvSV?gO>vfkyBNmmkKg~;oF>;nl
zkk-8CDSr`lO>&EmM8b6VB$+-n=}3Ecnaz>w6t8rE3^Y%a?wHU=<VU@Pu~-7t@F?*t
zy7;Lymk8&{Q;v>o@yL_Ob7)#BgNQ7h0>2=D<de&~Yt0R!&>K*ETnS!@atbsmg{6)S
zb*7ZyJO&&YTGTV9wQeBAb)%2ZO@%iv`QQz-*>ZjlVX45Y1xAjBh9S6|4F_h3J61V_
zi&k4BTDUZcH0oNIGvoGAb>>jox7?#O;QN9XQ(Wr|4x_&90eH=;HF#m4wo!L#TOh3N
zS|gdaL)25HO7hq!H}dU$#7%RF161%2{!IHecj%zK1<)5}#!`6M(Z{$uXhm<s=V#y`
ze}#$wX+wG*#iL_j?*__<j@^*1po~G?#1kXzj4(pl$G=N=oDmSZ2V~v_O?(S3ld-jA
z6xw&teo?e8*l&mSM&d5kMT0i=JSpKk-&kjjM(v`7N^`kyl+$J^>&>^Zr}c323XcY<
zjFaa{id!Ss1o!OIri}#~0Y;P$?)~HQDZGJu;~8|*q;6?mG$AfvQq72l?s;cC4p;sI
DiU5WE

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/Model1/__pycache__/log_key_LSTM_train.cpython-37.pyc b/anomalydetection/deeplog/Model1/__pycache__/log_key_LSTM_train.cpython-37.pyc
index 692a8640be5778d0dd2052caf387a37c293da659..7a80e57ec597d9be72e267a7604d493cc7030848 100644
GIT binary patch
delta 858
zcmYjOOKTKC5bl~+b|#yLNzCfT#V7$;A8d3*i81)TMLdWK?P}<3x0^AuGn1ZPb!CEw
zJu66q2T!{Q9z2Mmc<|sK@TjK&e}JGTJ$Vpoj0bz@Z<?>Zs-nJnReoC@Jv%bu5j<Z$
ze7$*L{Pk$XnE7?-+=)h_I=tIxMqD(Oqt=;_1>M9?v@(7J)ron+rN-JkHJbBOzcbrV
z)^pYs_@?h$<*}EW_Pyt+-G>1=Pi{Qb&#e7N)KKd5AO&ug$U2d$8*{0{oz_2QE4qVB
zkrLT-?wwWzHKadT3-Cyf*p=-=4-GZkFQ(Q2dd6lUEnFtmf<9$mg&+Efz5jun!~v>6
zZLYepsE%e<6pJu(rC>bpLy=@w;3q0G+bqsZKUA5WDCx)eivu6|0M)i+VM}BVi!ql=
z6HCe|1nMXeCXvkSTS3HB)urw&KM*T1Q)}7CT|eY{(NTMMp_G6H9u#2|3NQ=fU_cqB
z@tcEPAnR{FIt^HKN+dPYl7e*G?Q#!h$iVCYkaVIy!^i&T(`k;{fr+udFFvkU3-wdT
z^#EO}sNMb9bgaK;KvI``ShZ3jd>dvMKsMfA%Pa035FpCp6L|+t(6veZxp1H~Ra27r
zVJ(PS>!Mo=BiUwwoYmXieYo5ucPHXC_qf*X;pzb#D>I@b8_F%@KKc+#v`{)2kkdJo
za;WR??%^fs#Z0ocP(mj1V4muVu+tWjDM9UK5QXS47m794sCCB|cd2_{$Y`Y{8K?TX
zSHaF*Zyxc5*Km&Jg=@Mv93!zgys$*wC{}*k#~kXZNU>lgQ6i?ch@<A3JeKQE=UvT2
zY*0I3t0KspsH5VJqWZ^hb$1P;$yo$~J1W2qyp}P6TbnXGW8Bb-#Y3>H{o+12sUH{X
n^9!il8qmfUE&9m{yTg2}90=-MjCdyyTUOL_<+A=+T!e`~5-;pi

delta 758
zcmYjOO-K|`9H0N}&bTu>?)tH=vbv@s$SUHBNUS23WDw{nDv+&p+<ESf&X;fAnk(y}
z>`;iOmvjpkqE2Pe2T=rFy9Ckg(NV*W9r~a{^oBb09`E=6{&@fYv()2MY^JeM75HiE
z+r5&EKZ&KqbKf?(JUfv;y7%Dd;r)OdzPxq#awk9DbNuGu@b!z&&v%Xw_VROX>9nI;
zRp0dg=ZF2W>7asv*6CUCH!wOInoyWn&N8u>W%OpqhHm;jbOGMdxo|3eeM{QdtV#P&
z7tRQm9a;$w${VLh1BuWN;T$Z{sFZG#cSRE2*J|N9Y>QiwTI9TtX`oZmMeyluX>c_I
zOfpE-Lzx&0yB>0wf{|`myb+&Cq~ckKg>2npA=4pD@(DIQPCU!x64sp(H(NYP);%6<
z;1HhxPH<o{L5}ZYCM{d8PBIFk!}5);BsU2V1{E}jK?Hgr$!|Y&0nR=BDCYs<G#lwW
z(Nt?EP-|XEZ9}I}OB(TZp&o*NoY72IB4gb1ZY15?!Yv{QYp*T|wU+(9x&Q(R8?D;_
zf+6tVCg`WgP_or>ORK0l@|P3{)=lS&NV}DRPS)U;E6d9kT2MXHDcHz%arGCB@X^pk
z<)noBdCT;UG74GxPDw}GxrjRg$kDIL=oC{u9qTqC2>W=1>zRU_ijA;N5R;0Q>mVi@
zi0H*K3$L5#>VmR~uv;i$-C(q!rg@z$HOujlnwKYo#N)In>T$)jXl{xru18GU<S`2>
zafxmfe1iBAkmnZ5cslT(52}WIw8kV$UqTj>-3sw4gwY?-3=Gj;tu+cQeCP@Ont)Dg
p!>~wgZ2)HJo;D`W1W7vRdo7t@^1N$QEQIAimkVH#{?sNR_6M}n@8|#k

diff --git a/anomalydetection/deeplog/Model1/log_key_LSTM_train.py b/anomalydetection/deeplog/Model1/log_key_LSTM_train.py
index 0f222bc..05188e0 100644
--- a/anomalydetection/deeplog/Model1/log_key_LSTM_train.py
+++ b/anomalydetection/deeplog/Model1/log_key_LSTM_train.py
@@ -32,25 +32,33 @@ def generate_seq_label(file_path,window_length):
     with open(file_path, 'r') as file:
         for line in file.readlines():
             num_of_sessions += 1
-            line = tuple(map(lambda n: n, map(int, line.strip().split())))
+            line = list(map(lambda n: n, map(int, line.strip().split())))
+            if(len(line)<window_length+1):
+                continue
+            line = line + [-1] * (window_length + 1 - len(line))
             for i in range(len(line) - window_length):
                 input_data.append(line[i:i + window_length])
                 output_data.append(line[i + window_length])
     data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
     return data_set
 
+    
 
 def train_model1(model_dir,log_preprocessor_dir,log_fttree_out_dir,num_epochs,batch_size,window_length,input_size,hidden_size,num_of_layers):
     model_output_directory = model_dir + 'model1'
     log_directory = model_dir + 'log1'
     log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
-    train_file_name = log_preprocessor_dir+'logkey/logkey_train'
+    train_file_name = log_preprocessor_dir+'/train/logkey/normal'
     data_file = train_file_name
-    num_classes = len(os.listdir(log_fttree_out_dir)) + 2
+    # 加 1 是因为 key 是从 1 开始算的
+    num_classes = len(os.listdir(log_fttree_out_dir)) + 1
     print("Train num_classes: ", num_classes)
     model = Model(input_size, hidden_size, num_of_layers, num_classes).to(device)
+    
     sequence_data_set = generate_seq_label(data_file,window_length)
+    
     data_loader = DataLoader(sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=True)
+    
     writer = SummaryWriter(logdir = log_directory + '/' + log_template)
 
     # Loss and optimizer
@@ -88,3 +96,5 @@ def train_model1(model_dir,log_preprocessor_dir,log_fttree_out_dir,num_epochs,ba
             torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
     writer.close()
     print('Training finished')
+
+
diff --git a/anomalydetection/deeplog/Model2/__pycache__/__init__.cpython-36.pyc b/anomalydetection/deeplog/Model2/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81647a9feaad91bd33016540c6e6407d62dd4aa0
GIT binary patch
literal 182
zcmXr!<>lHp%Q=n#2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU&+o^F}{^<nK`M&
zK-woYu_!MyFFhu=vOK@2I3vFxCMQ2VF)uNvvN*Fi#wS1B5yno;%g+T0r=*soCYNO9
v=f$L?rWODd$N1)_q~;jK#K&jmWtPOp>lIYq;;_lhPbtkwwF6mR48#lo$k#MB

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/Model2/__pycache__/__init__.cpython-37.pyc b/anomalydetection/deeplog/Model2/__pycache__/__init__.cpython-37.pyc
index d68d822ad074bb8f4c305f7492c43f1bfda4f958..20ebbfa38ac7a99167c63e980a86fd4fa6a68666 100644
GIT binary patch
delta 60
zcmX@bxPXz{iI<m)0SK0t+?dF1F8|BZ*(#>Eq%@^6COJPPHO41D-7zmQr?NP+I6g16
Md}6m1>t~R106v@($N&HU

delta 102
zcmZ3$c#4tRiI<m)0SMermrmq1_xxG_WV>3$JYBN<>Dpx=>dB7ICp)@hOch@2UH@eF
rwr5+rU#y=R<CCB6n3tGSS)5sn%+@W=$jnQPPf0DCm}tdX3^Eu1k^U;T

diff --git a/anomalydetection/deeplog/Model2/__pycache__/variable_LSTM_train.cpython-36.pyc b/anomalydetection/deeplog/Model2/__pycache__/variable_LSTM_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d7014eb13334567a7adffe276b853a4a1dcc708
GIT binary patch
literal 3900
zcma)9OK;uA6`tYi^1k%8BtIg{wro^DARA7apl)KgiIbp!Z8VVq2drFzy2C4x<|VI(
zBU!p8ySZ+&GHP^DWZ7-A=<n!nm~Ff9ALt^R_B%s4c9OQ}CFa4ynKNh3<2&cR)9-h@
zZ~pX~cUBquJA32jV15g&{4F}c1kYH|j6Agh%TVmp2^`F<%pG}w$LYJBwMKs6kJ>@o
zyz{IRbYhlv(Ryir%H$Wq$t~eddC>ig2~V`1G0}>xbY^M?v%(kcXDpZ#9nr-&FZedw
z?7f5x?A|8V{q2L%D3ZqyWTIlZY3cs`I4=u%J5o^@E8U^Vogxyk)Xv~QMA{yV4+gaT
z)z3belpC1DEZ~9#)>mxE1sB#cKd^-@931U@#T*v6INsIn-9p6Kz&+VYqlZSU%3J6Z
zQ@mnRE48MKv&yPiXje9RN4cq2aZqex|J(ts1)2|vGo^KH!El6q?>=tV-Hp^>KO82q
zRGS^`mIvcl$`%gLei$ZsqQX#j_Y)!FJS>w*to!-FC@h9y79Gd3)K;Er=g#*1yV}W0
zHPYUlB#$Ght>FN-aLPCv>Q0i650u$1yI?JQhJSHFgBa~F9Ar^hhT&h>xB166cJ3a3
znq+Z_@ea7<Nxru;I(|~fa=#ewWW`>TN7-?ilsk8dy<fbVM|m*<S;Q(HsHDhuL>!N?
zc*p4S`p#h_lV~@KLy|XCGD`BT@v-bcnpw1xCXCN;56=>x%$RLkuhcNG=rew#9kRv8
zXr)4@SPEODR>f23d`fd?YNxL9ro6WP=T}%PZ8g>!T2pJssvR`9_CWV5@^tn)3+`dY
zp?#Wxqf<GspL0)goc+>t|H5yxtzA{f!M=7Uu`J3)%MKaMXkA;X(4L47lR+%$zH%Pj
zW=k$$(5|tOw)U@SYj925Fiv3ahk0NR!dx!Xa?`EQ;?}T`Pa-Mk<wR?1e9c~U&e@ZG
zqqUP2n;!X59*tu7Q6~&Xg*bo#Lfs3)#|Kf?d@&}Fmxy)$#1jT_mWAQ#gEzT6MMBV#
z#y@B1Y=R7*_k2Scyo$3vsPx$wXaNcmytemP?V!169^PAuRe+JH^@rWsul$rxS=~Om
zS+#Gohws%L1dshbt8HZ|#HMhcF=Z>K>ZI<JrQQ^QhlgXG2lI8NoU_MU518_+_LK{R
z-x7PoWF_pn^#G>(oPEK*WJkAD8!O-xm9+{(;-t6hZsmZ3S9S4ND=;>h7y<2`P(f=b
zkG}^0j_Rhp%0cgl)^p3e0oJBg-D@}t{>Ul*S+y`)FSzVfhAueq7+Tv*_-GXV?Z$mH
zml8P42INNcqmkM+no(~>tNA@+2}COYku&%oWYK6>L^rR|7Bn{5*o*U6Mk)@=_%V=T
zH_o;`$cjOfl{YbImcOKU0!LS<<8i!cH3Dt+wNs4aT(@N$i40&;!mSU+S*-0*G}i7g
zD<TD;QZm6?InENLy=aUrLfaWOx-!c5VoA;*&!E$uGJp-v?IA!V97k%u36B1gBp1b#
z5Zm_De$XRmE90^Rc9hyP5mD+ELILnq2CYND3Lr>3!z|j<e5m;mP!A$%o@ien4`8=?
z?X$*N^}>&paRp+PKS9U1^WD?qKJTEd^BH#zD6~wEeGdJcB|pO2floW|qwQ@p<KO>)
z8Y^Uy6$U?sM`LUY8{TbCK@-jj;HiSIg5FU`q?Hf9?Y^*%zA+x%h4;V{Usw;nh7Y+_
zkGu~%dcs|S&j=6u`jubxG0s#o^{n!$naV{fu>n+b^?WriT8}LG`^O)tRyBvX@hPD`
zcj^VKoc(U)V!ed#i*I~CRn6DS@Y?yRg;mQ_Cv7W4QoW+OxJSpxeIf<!(OqE`XrlL=
z*Q?5;_4>4$NoT8-bZ*KjPxPN#^%+F>S+sMA)dlc+>jgjhtFr0-xFdh~=Xy=F>hst$
zPigZ=ss&nyNF3re>u4A1i`9j65mCN`c1oP8oN69m-pF>LTB$C8@1^Q=b*Z}8P!W9_
z)tPkJB;t*9#f;~xjdXR&-e)-NOnRDVFZkn+9<b_MdZs#WVtge%YoyqL6sL?7zxppy
zEdQTUoCR-lF7d~Ce*<Q?VkSLTt&u%Z0-Pt!q;F06SvG}-0BKg!H8J}py;(c8@y+ZK
zbIT}~44z!6+OK^B9&s5Sbv|9ME>HOjtNbJ0F3=nvbTPdIEpNawm&N=KY`I|G4R7P4
zi|{nd$a@9)SxqmKRv`BhaUz`=#9FyGnME=`jIx8cN#0i{E##9pyMA@D&`iRnoS~fe
z;iUERcw7wjKQsV0akkWvGAZ-^rmZ`#7wT5Ie=r;ZU^?R@4@dE+kjE1jM_s?Fc?g8^
zFc*o$%*y0s;}hb#{_Cgp)u;8tx7SS_fdVDWin6@1{`AJyyTe*{4B60LUPr8c%OP#p
z2sE*`#%eNup9&CcTpwa6_hTVXW1;CbuV@<xr>z3`lO?6X@$x<T`crh;9m^zF1{5Tv
zK$HIDTQpmvj*wMS7Sye~+n?NlT-q(hDj8{e`?K4)XC#r4QO**!k`%ff1N#hQYj=<p
zfHYJJDnc>g947G--KLTv+>?>e{%$mQMA=_EsE9|X@l-sP@6u^b4C`nQxqk$VZV@7y
z5;+ngEP?*03mrmVR2wC3r2SF!D8{60fZ8deLs+3ylsQB)0L(*{GKMzV#=am*y##B7
zV~I110kFD|hT0+QHtH=U;iCK*b*9klXD20ONGZwGLcts{X<TVoWK_E^Hzzb;Pb>}C
zAL|~i#68CuHU}M(L8xje<t@5$lTwHfEH);8l?P1K6RhHp1SF)C@cUn8zQ0^9f?Y8G
z1`&x4f(48JfrI>%7BBzsOS3~RkzlU5kM^FJ|1G*bFeAj!ZQ}}576xtd*^nG8m~Bq}
zjsOoXRFs8eUF{mp%3p%9+#&8Z61#RyX4hV$Go(Utv^E*B$+17E8hgzNO{Crimr|f(
zNO?YAMe19|IEUs~)LWR-<M1WxqV4kqN|ZEO4!%-i#A=K7_K++)m@RWdL+QTG&shuD
z3p&4nFY*}YY%Vlj=9?mUE6<w<={B!MC-QTw)4ir%*e&4WUz%9bbIqaym1N}>nXxG^
zOzhpoo+j+*Pnzx}Wf&bs33LNU@$Q;}w+T>6;zpD=N`%)%wf_M$gW~umaV@DRr+T1+
O>p>m%s<p7{ul*Z@_8Pqa

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/Model2/__pycache__/variable_LSTM_train.cpython-37.pyc b/anomalydetection/deeplog/Model2/__pycache__/variable_LSTM_train.cpython-37.pyc
index 342663712c4e9bd7ee6cfa7b2244cbd290b5b348..36c0f4c2c181541eaa9901af6e6cb38e5bd15cc5 100644
GIT binary patch
delta 1192
zcmZ8gO>7%Q6n<~K|K8m=X&O5zq)9~@BDD!6K=`RDku(TORYHsEsw4!fV;jfDKf9C4
zDYJ@Hw-N%C8coI7zHmU|09@dX;KGIL3M4o|#DxQ=oOo+jfSA#|zj<%w?R?+<F#Yqi
zzv_7o(D?HEgSWn!`of=L#|}5vp5F|`F5la{(cs0+i;eP&^+Ihgtb|{+n8x-;Dc`2G
zMl1XP0XRmfGDI3JooqvZ1kO6Fi}xRkOlkJ850TZy_L#KJLl?KK$d-1G@l+opo2ndX
zbFIpP4X#J&4Uj3WuRxTdoD?@G$8BGh?s?d{5P8zur=Uqeul<D3gf6`fc91q_B42va
zp9F4IQ^6f!480B}`Yg2W<Uu+;Q+j<wXuXpMs&#0WaXkxF2>xoH5}IVgXbzI-;N0<O
zB&p+e8M!kK{r#l;r#!S*2iFgd$PH5$DOIB@963UhW#KNHgycG@Ka5gr80lim$U|!}
z4|(A5I$VZpAS_|;I8q~N@bo<<?4Bd_K6Y4?p|-RRa;$1EcQLpv4KnG0$;xo2#I68$
zX~7j(@Gb>}3B(x0=-gtb&m1J~A<i7)sTj&+55|F`jvGf`OUGKHS**u)P%Q9TrCtnU
zJrqHuNvc_^h*%E_^>Q&z)hJ0X)fxp68-*s7@E}D81(PIVLlo;_BUl(!_l<MU#agLW
zD92ceu^ns4q1Vlu#T%j#gfZ5xJJsUe_RT_Vm+rFvv13*}qkcD@UOk(%huXvGqlG>?
z<XN;BTFfEOVPojxIQm)!r$|rGJehuk<!C=xSFf6r_=_r<&(3`+QUq#xNRckvdJwtv
zl5U4EA(pyBkkxnQ>=A?bce}MJOqn76nKUhRXdcst)|557Y-j&dlLhW%iBP1|_b|!{
zx91VdCxyQ=BD1|r9|$5>)vb9n)F;+y^wb4=Uj1R6#B*xQUK#i+=C=tAxv$>0XK_S*
zW<R=+nE+0`5VR+x{cn$1h~@<D9PiD@xjuGT_>ub4&b~eRzn;bh{`By>sJA0o=rg2$
zu62UwpX<%_fxcYl<H-xORuk?KWsY9Dm0c1+p;BL}HOd2Mmg<e*-9k-mJCAD%1c>_B
z`Fd=bjNmB}vEB?SbwN-4X8J>XM*T+eB^A0QTvM_89=@Wkdb`HOBz;@`=&k3jkr{Fl
qKsZ{MBMc`<yK2$Dic;P6=kX2oxxb7X>Q{fkSWhaY%8a~#PyP+*{wZq!

delta 1153
zcmZ8gU1%It6h3GEXLfgzwlR~nHgusTU9dr@X+%q`n^q`jpax8pv|*AvyKXbx-OSvC
zB)vl(wh9u0!He3*T~P~)6cHsN3cmT^t1kmSD5<viCJ5q-p4pWm&gFdfoHOU=&OP7!
zIQsplTPYMApr?6a_v**qr|!eb#XoM4ec`EvyYJt=`^ksdcCi1(!T!#|<neo7-#Pf|
zi(fw9xp(L7h3A{KXB&RF8Q0_g>ZjsY>y2QM1?!*9D4MceplUYB3`snM01Okg28l+Z
zlNe;190V|>t-QIESi<UH7ZQ78TG;2|%9+FwDpRK*QMtlZrZN2{aFuJq>F8bP7+q*n
zW*aHa=$OLkBEss#Ja9|cZQN#tz#&+HNH8@q=Yhc`co8nc#yM_Nh|xx&j6fTy)niFc
zXjH-wIa-PbuPH^y$_f2nFZueO)mzlS;kj#hp%H4#+*Nq4lNWjy+e(t}S7x}ft?gly
z7bdAnPwTy6SfrL!We%7{0&`HdXCt~CNHMyf?aLWtPqt?=M-poUB8$e^Xqfaerr79d
zeelSaT4zK5`V#Y}A17-&CV;IIwV)A1J`Wb-V0AI{mx6HOY}j1(!+4sMe*ArfLaC(A
z_+~38EBzCd3#ry@1&!2>0-uF6+&I<wS}P1v^}63m^=jDkd8)@es*@GB!a7e4zeOo5
zRYyb0)FZ!93nG=uM@53v;6Wp9M&*KR8Pi8!G1r4--i+c@tA>6p#cGNhsg|{pzZ#Ph
zCn*I`d$96op+j;UU40N;9L|>d2#(;O5>@0?vxo<B!+i44M?Ie8+L(=ZPq}hSerpbC
z%<w?|V3v-UD@yeGYK5C(kSJ!+vZZYu)%#l6DxG#bAZq5B?$NkBU^x#2`B;7zlM$Zp
z6o{xr?p_-ZUT3fi!e9lNSmS8Q53Ez@%9rhN`Ij|@6<M;UdiIq09Wh5|1M*$FgoE-+
z`&jPKFfcj`(PK>hWFNz~<lnY;`SAaGAL-en56&{=L03W8!_~2Q5F?#OyTlW!{7^gl
z0V)~Oi73OfTLWRUw(f^(!6J|RdPA-|C!Op{x3meq!R33-{^2uZMw0}o-iqoCPPZ85
zcJK-LC*h2|nXlqG`CI-5J|}C1HRECyz94@tTo^e|YRm|L24c|&92$m2ANGv<5^l<O
b+;O}lzj06F3-WijY*ez$O<5Xv8b|*DFP$n!

diff --git a/anomalydetection/deeplog/Model2/variable_LSTM_train.py b/anomalydetection/deeplog/Model2/variable_LSTM_train.py
index 7fb56f4..0f08c0f 100644
--- a/anomalydetection/deeplog/Model2/variable_LSTM_train.py
+++ b/anomalydetection/deeplog/Model2/variable_LSTM_train.py
@@ -35,44 +35,64 @@ def forward(self, input):
         return out
 
 
-def generate_seq_label(file_path,num_of_layers,window_length):
+def generate_seq_label(file_path, num_of_layers, window_length):
     num_sessions = 0
     inputs = []
     outputs = []
-    vectors = []
     flag = 0
     with open(file_path, 'r') as f:
         x = f.readlines()
         for line in x:
+            line = line.strip('\n')
+            vectors = []
+            if (line == '-1'):
+                l = 1
+                break
             num_sessions += 1
-            line = tuple(map(lambda n: n, map(float, line.strip().split())))
-            vectors.append(line)
+
+            key_values = line.split(' ')
+            for key_value in key_values:
+                key_value = key_value.split(',')
+                # 将字符串转为数字
+                for k1 in range(len(key_value)):
+                    if (key_value[k1] != ''):
+                        key_value[k1] = float(key_value[k1])
+                vectors.append(key_value)
+            # line = tuple(map(lambda n: n, map(float, line.strip().split())))
+
+            # 补全
+            if (len(vectors) < window_length + 1):
+                for i in range(window_length - len(vectors) + 1):
+                    vectors.append([0.0] * 10)
+            for i in range(len(vectors) - window_length):
+                inputs.append(vectors[i: i + window_length])
+                outputs.append(vectors[i + window_length])
+
+
             # For each log key's log parameter value vector，the length of these vector is same，the meaning of value at each position is same
             # eg: [log_vector1, log_vector2, log_vector3] --> log_vector4
             # so each element of inputs is a sequence，and each element of that sequence is a sequence too
             # nn's output is the prediction of parameter value vector
-        if len(x) < 2*num_of_layers:
-            flag = 1
-    for i in range(len(vectors) - window_length):
-        inputs.append(vectors[i: i + window_length])
-        outputs.append(vectors[i + window_length])
-    # print(inputs)
-    # print(inputs[0])
+
+        # if len(x) < 2*num_of_layers:
+        #     flag = 1
+
     data_set = TensorDataset(torch.tensor(inputs, dtype=torch.float), torch.tensor(outputs))
-    if len(vectors) > 0 and flag==0:
-        return data_set, len(vectors[0])
+
+    if len(inputs) > 0 and flag == 0:
+        return data_set, 10
     else:
         return None, 0
 
 
 def train_model2(model_dir,log_preprocessor_dir,num_epochs,batch_size,window_length,num_of_layers,learning_rate,hidden_size):
-    log_value_folder = log_preprocessor_dir + 'logvalue_train/'
+    log_value_folder = log_preprocessor_dir + '/train/logvalue/normal/'
     model_output_directory = model_dir + 'model2/'
     log_template = 'model2_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
     file_names = os.listdir(log_value_folder)
     for i in range(len(file_names)):
         print(i)
-        file_name = str(i+1) + ".txt"
+        file_name = str(i+1)
         train_data_set_name = log_value_folder + file_name
         validation_data_set_name = train_data_set_name
 
diff --git a/anomalydetection/deeplog/__pycache__/__init__.cpython-36.pyc b/anomalydetection/deeplog/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de760d8632306ae318644edc655711b0f6e441a7
GIT binary patch
literal 175
zcmXr!<>lHw%Q=n#2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUop;BF}{^<nK`M&
zK-woYu_!MyFFhu=vOK@2I3vFxCMQ2VF)uNvvN*Fi#wS1B5yno;%g+T0r=*soCYNO9
o=f$L?rWODd$Hd2H=4F<|$LkeT-r}&y%}*)KNwovnS`5St08BD5sQ>@~

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/__pycache__/__init__.cpython-37.pyc b/anomalydetection/deeplog/__pycache__/__init__.cpython-37.pyc
index 5a7bc22d27f67b871fd8fff759fc8a59c765275f..2bec9dabc02d601c196bc1a09198dffc9dc11c35 100644
GIT binary patch
delta 60
zcmX@iIFpgviI<m)0SK0t+?dF1F8|BP*(#>Eq%@^6COJPPHO41D-7zmQr?NP+I6g16
Md}6mT>t~R106HKOtN;K2

delta 102
zcmbQqc$ksfiI<m)0SMermrmq1_xzd(WV>3$JYBN<>Dpx=>dB7ICp)@hOch@2UH@eF
rwr5+rU#y=R<CCB6n3tGSS)5sn%+@W=$jnQPPf0DCm}tye3^Eu1iV!NB

diff --git a/anomalydetection/deeplog/__pycache__/log_predict.cpython-36.pyc b/anomalydetection/deeplog/__pycache__/log_predict.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..abe032dd88d12cb3945a3448940fd0c7eeb258dc
GIT binary patch
literal 5415
zcmai2Npl;=6`l=)g#ZXHB0*}Swb%tmrYOowBFna9ONx_F2qVgoYa$sQqKDu>g8|sn
zkVv?3f>V`?W98_ab4YSX<p=nbvroCD52;k8DrNnHlnzP0*8?t=tDKqYWx8K)?|bj{
zdvm!|D)i2M@zL#bhVf71$m5`X4Jr9EGG;I{GQ6}jJrmznWOZ!MHmS{yoKD7b$Mr1g
zIbAoSe7oQk^ymq1LYKvM$twp&dlISAo*Ef^lR2@)G9%NQY#HA44-MwB?576H2KLDI
zW>}8pKQ+8rR$vn-kFg>vp`2r7Hi>ecRoE2DDw}3AC>NNyVbo@yV%J8!W-8}q-0z^w
z-0rd<Qcknad}TL#{U%ZDKPD>Ikdj$sfk7wqEM|H(vpk2{n9)&%wJ;9q{my2<Inh70
zk=1J%m1{I&zY{bXs?cb3x~v~jz1V2%^!+I9@rvu$@7}$6r`}lmQT?V@tbcI(-J5qd
z^w%E@K7pBZ_N?*VmHW5%ZiP{hpj-=l9*1%3erNA-mnYlZ-u<ZC@?$^ROTy&-TDNt5
zzwXD~4w_jYf~E+&@qHEqJrL7?Mvn(9Y>MUH9(S?SG*UtU=S;`cElmqw^kk6ANbeyf
z17yM&8AD5$5+X3NKL2=V?|vxlwly+t7!THljxYq6#OzN|3P(Ea%*bfFBV%OYHHT<n
znU*O{=FA%+E1i*9HD--@13gwdC-T%XGEHOW#69C4gz0C-9pghoT41?bS0--~tLdSV
z(gu>=yhOET)-~S%uo)k+krzRva#th2v&sCokJAt&W3bo?;(+@iSiTx{n|_qMjY>-N
zRAc&IJhg&yqA(H4>GpzH<$2(<2-qje?)W`rhp|wZMDVbOq8Eii*%9bmzXx)rGNRv$
z0?36!*sIdxFlODy4Un|Nwl__9JPh`dMiT7wgSd%l%`I9G8#|DL@d5UCJ5L87SPauO
zi)PteG8gR=CSO9!0fPwHNhA&3A5kI5z-Ut#Z2$l;EMc|nmO<>^{grS;MgpRd2_Sce
z_F<oJr3t72>pW=Zq2EXj!2j{k*?kuf=QQ}Q4>KY!3M|81fL{<3GSe;s{L%sV8DXWf
zEEsKDLc;;`M@M1ad6^&wjM@6248}b2qkhm3K_V{l3QjO^mc_2%B~%ZN`G4eq6Puo6
zBqz3f4=ELbv*|JfL^J9qBH-VahFV!!-9%+c4p_)lspkt2#;bZPU&Lf8!~M7w@C90M
zh8k=L1t$x_NyGRtYRhazU0-lQkXMn_Ts1pZ$sX?l{iMqqSi!4OYr3<oZo~q<zd2Rd
zf;rG82BH#r*)Rr+z?RcA=W`-6^{um=1yw@(G0dD<z&nj}Ldy_;8Ldq+1oY&PNd2|^
ze2fZ%`U+TjsHkO<G(irA8JQtf9=b3IrUbs|b=sD&AzxXUZ9}Tsu5?K%K7({&T(+GZ
zL31%vww;r1s_rri)N}Qx?_iz5thUXUmSCxIXg^7U<jT?@3sV|Imo5&Lt~0;W*z`qn
zyOD&0;H}k_l{bT4x4He+rInTCp5U}?n1CYGvq5@I25?Z;;~M~O`^wTM!&+WtdYp9E
zO_x)a=qi^5kHTi4CV>)bBm$-ZXf&bVqRX4x&`Y!{CjeWOyVd0#U(|9cpH5AJQ3&H+
zU+8_QiS3ZFAWlCDbehQD3wWX?j-1RZq~{{gRa)13RBqb-JfahNXKCL>oHNDJa)4qE
z?=0RN(ByBS?*L1Z3bF?p%fF*CwgpZL2=0Cbqiu?P-T#uXgr>R+&~|1$wD=Whvdy%e
z*je1~V;1!ff%-5DL+_F_fSzO_C!NC<*ow@&A#-TYlPt8eGCwktUw>oH+*o$9kR8|%
z%v+F!_Jn|UAoFzpk7cLOE^FDDWVv>Q=Ft*`-ARmrHCeo8$P6SYUmuIh>P3Awr`Ds`
zNTuX85|U@;#j%|53t-5%#AAxX1u{}?CY{kb&d*cr1!T%0K~iq83yYA9WoVTXB7B9i
zOO&lrcA2tQDSL&o*O1jF`0G@EgR<{a_5);NQCQXbpTfV&Yv@q9?pUVABwG9Vo79!j
zqVOG3vU(L{$&1KH;~nUG1+roxcaW;MKj>>;ZXh`~K-nY}qKB+N7D-FaN2t*40Uyl%
z%!KZnLsweB-)KW#WR~G_*Jx!(zkv!gpKJx{xcCVn)G`vbLDyWVWeRQDB52E>uRkU0
z{Ir0qW_sDJuthqq+-4BN;I`0==O6F1e)|PIe|x89tBfCot(c15wy!deLngLs7QYEX
zehV2i)b2%4bWGOgfoE^}+;ewD*n9}&_9A~1o}jy%mKhO>C{PZ(tH-q*|0%|MF0Slg
z7qr@%CcI8F-$tfvOsULWW$rzXz$D}T;|3XuzP3MiKt8s+MR48$$BFaw@(<A0Bu>-w
z5)xe%n&W@94|Fvr3}^}LD{f9_m}geYWcI8vEC{o0!YP}e8yjVjS;G=i8EF!!f;0tv
z#GMU2gc~-ThStyEw$F{s&zFX?>5VOC$xS09()%1bI77E_>VSzn%WBl1l_TSc_4)ho
z=&j)~q`4>7?(1^;23j9n7|x@my1OEZ<V(PlI5%973!)_FN9IF=e<=%cUM?))Vp%X)
z?hV5>t{OXE-ZP$<yZ@5alulYz;~{3_U&%R^pEYEa_6<8iHe^vAqjAGyn5T*t?Z1R2
z8Xn)hCW;t;eC)xj3{M;xeIgw#XCAswO#Ujyja!D*C+5zplwdh%M$CuVP6}8Wc^vs9
zD|}|PD{O)8wvpYQ64T%e^_1nwk%^u~<Z0_knWy!Kr_ehiPqEy{A}cA~3Gh+x@-+B3
z(VmsZ<Y|o7dH{}`#EMIDQSZ*;Tf1AtJvy!C;LnFIr2CRDz-1nD65Lte-!HW<P!DXN
z=G(gFvOY`Nca+<)wGM9Y{0HX+1+4;ZR^uw|<0zf1YCc!xlrE>_NqPEV7VG}$V5h&6
zCEZtwbF!>!<(;n&djBFTx~+mX*lVmZx$`wvozwNXo!=kz{A5X%fe~&StnhEh(3!Zx
zin0Xsb<c5Qcm}n~n8xNOKy&s84SGOxR-Tb(4`{A|=3G3>O7aZmIg6xwL3a*wO@gk7
zwkdp{2QFpcQkE6!8Kt%=&nD4!o<_-Y2W=It2U_RkMAaC+h_O?kdojI@#4(CI*(^K8
z=337F=$6CgS#<=vsIA=b`e6S38@DzZ{$@;0L<G+OMo4?N<j`{RP6relAT+p`7TOO!
zaC<30BTsDuNk^$I-02UB<lU2R{~K$^*+{*?n#t!tRI`+|yuyk8IaBqlaFyKJX1CLO
zyHC^6+^#V=#kvQP;*s!ZpL9?fhmlKgleo}VY-(;1T{E%Q3kENJGF*CPX`hS-Nk0<g
zdtF!(ydNy-Gt>7|h83UEvV&8%)>DdG^|ZV@{=Tcf2RE-bo9MW*e0dAgtn;85Ch+d|
zo6cWYx)UIfi;gthy7WpX@Do`6gT}$ktuPLg?SRn~<lrHy9#n$J?<E0ih_FMnLvP~d
zf&E6bho!yz!IGYvt~4kdF5pcZ8vNv$d7e+<OggL0X^%`fG`F(iSY>Z-++6D>311?b
z8DuKAA6g#92dU3MeuT=#-A0T1?9ee$t_WfT44iIv<va?5$A=z>%KE&8PN6*xeaF;Z
z;&i*DzQ;+bebWI^W$l>yF$)<y-$a!<3F5D9Ah(Se#GCo1xHc#d;pAR;`|(gi-;}DL
zMMCpY<(oXj%?*$|>+ZT|-9oxsM_Kpm>uYP?<l&_7tpl#ghYh3@{1h#aZ+82!XkdTd
zRI|@HyaBy@LkG$_^wFM@6(S;{wtByV04&(i0ieo^FM0S;aL>K`SSADJO~nYt$I;w)
z_hws}>&m=?JznC!!0M`aR5G4ZA5}Qi6LOG}C&=Jx+2vg7lGA0MzGajJ^Bml;)CIt|
zRYtvP&S|&30_SW|y9Gsr^mCwFL<@y!)8>kK%7SBWRq&k#ZUqakS+H_v9di!zxK`yz
zK8vr^slyKfi*xL$wwOAk4^T%81732{IdAp(pGC{#pGC{Jb;z|$p(qSdV{PN^?S|lf
z7<2d~24AMpm%n@T>hdF>hyEr))+58O(-v|%rdp0;PJBz<>M2B2_)jIGa1+0$sHWVs
z<Ub)I?OT)KQ^hc8_>cS$$U+LwQYVEtsX9?4s})Xrvs(By3yxr?BjxdrA^6bvOZPS%
cDp^8CEH3JoAS~Eb%dsn&ToG?6H<v5^51wi}WdHyG

literal 0
HcmV?d00001

diff --git a/anomalydetection/deeplog/__pycache__/log_predict.cpython-37.pyc b/anomalydetection/deeplog/__pycache__/log_predict.cpython-37.pyc
index 97d6fc254cfe9df763de9c9b897f01dfd60c3be4..f109c7dc2e9ebaa574bf991be437be60b16c9c0c 100644
GIT binary patch
literal 6208
zcmd5=O>A6O6~6cV&GXFI9{<FDaS}KEoi?>=7Yc;Xl9q%5i4_t<(2z2Hnfp9`@$>xL
z_nb`3ot2A(kXkGj6|ktr5<&u@iWQ5B1#85LRk;g<V1p52!>$|n&V4g>VxS;4Fr)kb
zf6jN#JLkMSJe+6v?LYIUuf4m-*dM4d_&KP28Y%e&GQk9oStToZg`;i7)|OqdIrZ7G
zv*lL2em#e}Z|XcA+R9h*{d%ENF#8m@hAShc&xuF3N|h34A?uBW?9O;^VxLJV+=eB*
zeO?)>vtaVGOyq?B1`~d0@7k3UVo2oQV3jFR5Ji;JVpxozoDrj<gmPAliE)&3VnR%!
zJSq4MRz7inSFqJG*UqKJ_7+O_YEy(UD!Esh+l@4o+No`eK-;y}c8w+vejJrgBPEl_
zLPn3TSb|q<VO1PqV{=F6uSAV-b$jb}C?(B5u#v5nUF}z^jbJORR&~Bw-D-;MnCiu9
zwY?q0*+`{$?c&<nrR%HJE1z4vR4J~0>FUduuHP`NA2C_PPA2={;w$HGCF!=ff2)S8
z-@4MQUu*>N{Ul0m1&!twsv=CoS{gMQw?r7W;%5C8Qne*R5!KSg)_s}7W)nyWao6V#
zH$62AE&6##LrAY6CEr4pvR&52W4m_B6?ndDeS4$pbY53Z&)R37VXv)q-IS%au!Zvm
z%G6PA&)sJ|Z=daA_Ac+b$`x*%D=xfgmgbbZ%V*dmn`RiXdVV@YJ^P$9Wee|PtZTOy
zZnB#|)fd@y_7$cqWv^Qxa&Fa7R&!Y+3e%5DN;i<qixjG5zG}z?&ho|)_2e)nY5zhT
zY~2>Yi>J~0D-zq8tA~wH25Go>A#T=!IC&A3Y|_Es@H?Or%6aX?QIcw>*$NwaNQQxk
z!SY1gTR}_PQ6trElFF!sq7_G}wqwlmf)=I=?WWtUI4pbGA(rdmdr?C)?^Q9So^Dje
ziAkH`{iK?N?d`Bp!?s>FrujM@j0+yZp#PA;t!Ht&WSm~YJ@B*0NB9Dtb4K_Smye?N
zkhjEf7s>Dz!VZb9bMTJ$ECu@Sa$!w_FM$m{+u)96My!4CUJzNG)dNx%lHjv5y_Cuz
zYAoUAcY}C4Txv8W0Jv0k<VlQctC{E=5llo<PnaqF*H)YB^j65sWFMO?*~E*JrmWIr
zLyYw_m0?(IZl|WNL^HEZ)|;__sOHxr2oe!9IZl~UJxu$PGa5dH3nheXX3cP)%X6sJ
zh!XT8w9}ZOia>=ZLa0rFdLYvRE`fW{!&9E(D08Vzm{dUeuG4c;7Zmc9-}BPk9be^0
z65a&Wu+Hxd?E|0K$?fG;j%ZB1pryCEGilV%@&c4mwH60S5+>&tc5+Y=VZ6K~=}tg!
z5$LKCEJ>($M$Bln6{H*I7rx#t7YwoGlbEaBmLw2**|yqBo7xlM-KZAo(YP6iYLWuu
zAZHCoO`Ec|0brqDJ0U2my>((>S{~9v*&)P>d{#A>%}0$E*pWm#p)PDhLWGU1k*62L
z!TnGsy70hjD%t8~!@>SebaU;QU6+sZtzd4MjeQ(1>Vw+1@?e_JC-L_IZut_%4%tUi
zK|0N_@3*LcS;Qo_4u*ApMa)d0gmyVp)z85?OXX(4&P_|cpu8S{O^j;KW&K@CSzRA0
z3{0G65C{hf%TZW8&4i0xp{kBX(FcX5hR~l+-8)_{r}FzedF!El{XSHDKxHV!c(<Sm
zy+T^NGpzFTS?fck&>JyO85Kjl676H4f*YzrcEg)Yc>qp+wGWY%C8G^9Y0Oo_R<pM8
zsU3T<m43hn=#kHYApZcGG40BrQ4f!>Bj+(s+fZNfajH2Po5Bv{7n<lQL2^YthatI4
z*$QRPQ}z+cK1vxW6!`+Oa#4N^g?7!A<R_^AJY}Cm*2lz(!9<3G70n~<lcW>0BynDT
z3Ym5d6g~tcE0-`Wc>);&G8{e*<(KD{sn7G7ertdmAm<C1d59bzjYH&o4ULrD;Q~tV
zP5NYUh`R;6usasS-UzjDh4-clac1BOl)*k!1L}@)h(oBk%I?=ZWpIVQ9C6jC&)jO+
zIvB+{@-bv&cC1HrBTZf{B=e(=Zv1}tQvLhiqu*~{D%;u(;;7!BqO}od_g*B@jk2%p
zRt!<c2HP^!_U%AcyiOX`Ho>j?ad10?Ja@9vO`|jpwUcZ#@0ERd6DL$We0{<W<{Fin
z$S<Q)ehHbjv8Co6&F>$l`>2q?y(+1SZKI36is^j`ErRwAq?4#mfB725Y9yKTBd}22
zNa&wI@f0!&nIh_OBORcx@^{^C5mIO=2WmHK52tp|-e=vB)akh>M}^fbA&nu8BTXPp
zrl2)RE2-*RR`*1A%Je{T`#sa$nKUP0V&FWZkoW9wzYd;w-Pt|6b3M(g6QmvX?AJbn
z^>f{mBG;Yo{4j-#s*}6;1k7OW4733>$Xnf0>ZCe_KeWL&tXbAQZQ5v|ekS!H)u)k<
zkX-j5%gO~#>|QAygXCkRH>}R=a~eUO^`22hbs8U>1&ofX1(>~-z|Lx@0Nj`}9#Lo1
zBbzz-o*FX-(wNA1=dZE$d(a8<dt9!y-&O_rHZ<PZPMj7*!QA!r?x}PFv?)S|O{!C*
ztBf8Sh6#7R`>1-fcLFqhOg*ONW?64ajNpc7iP239d;WNj-&<E_d(-N%-i#>i8w2gg
zDBI)h^_#5ob3>)Gef|2A?&F;wWIgKfS)69>vDbY<ou#&VVsOr&eSr;X7?}~E(V%^S
zb=Hn{qLuyKE_9z9%)yRNLYI$k+IyC~kd8du56&H)s-C>S_N?}Wn+*5<i^<=|?Zrd4
zUjx00SaXWbg2@VcZDN&tyYoU?$l6EaeLT!{pUPJBro}`Z+GEnlJ<j?iPO;i=gJQXE
zx%0<|PI@0G8m4mu1Fda2wP&|k=V$%*_RsI1@Xp`_^%VXC)IFV@n)LvA&lu!A{V?(b
zKSUml0C^9NAL8$s>@MnQ!k`)j0ttQ3Wa!+ry64n6mEUx5&vOspuY3&>(>@0TmNCLi
z3r0J=_y{G3^7a+Z@GZ`(X@iJg{3|&#qu=>o%GjPGPpguVvQj2xdrtebkuyg!)J;OW
zu$Pms{tJnl{4XTVmGgVfz4t#v;+!^N_WwoVT>0+5l{mNkE~LP1zi#r+|A!LiN&ml)
zIL}D8*Y^7o_xpz=?$-}VoF~7z=e1?$r~g#qzWcwDxCdm-lkYr$LXtJF{f?0}PfF9m
zhx|y+)Ql?4u<nGKQe!B`@x43In^j|KLY36`zGG}j)TWu3nq<^B&D0pu#56m}y3dL!
zRWkkPLwoVCosBPvX|;&)C8R~QWad1FGiJ;;)qJDX2+mzb-z?5u&a78s^~|jrbH~`K
zI}=N3n55)0!U61ij5{YD>I`mYj^xg8KWMQ4w?(GL%)Jbw$rb7SRw&8eT{}2~d}r!#
zy|HEnj;I{Lpyv4<>)GWG_`(PLoMwFs3yWrsEgBrVv~&9MHFB-aEnHsB%C&xb&9o25
zMLeKz$<D;(s1YR_p;)+<#aE(6eWw)0K`RMGHI24H<D<<Pf75s^#sJk04QZ><(7CHO
zE?sFRiSg4j3!1EIooh6!bqUvOD1<*{ViWQr1~S*$OTz}7W$muRVM0LSZWP`-GR}1_
zkaY~F@&v7TnX)O$C{ADueK|)pW2&!6@{MJgGGnwCGWW|k)_Tk|R;uLF8Dl<?MDb%L
zy&f?>U(gVdfP<On;jJXBrW-PZ$r%fs-$parNxn$SKZ{J~4O<PB^iWMkuz%s;R;;ya
z73(t6+A7M`ihc3QmC87wrkddbzHk8l|5#rZMwvA82CN*+Jl<>OYPUu)KkZX!g&ru+
z;#QT+zh15;+ck<v$XBV;K~Uy7?e@)TI4W?1D?@#Fh5~+5*p`o&M*-GZ<RQ`4<}%Vs
z#oByc^E>^WR|Y%79uM0q70+zCQn5iYytP)-{F>(1@$_X~d=N>;t?|5Q4x)Ml>I4x~
z#_YURvPwQV%Vdye|8YKU`!<>AQ%G~X#EW(b`4nb(#-*m&9{yI|u^c$q^B5U{8Sn4~
zYaH|D%zuin*wi|W=#qo}5wqJQ$BEc&9>)J9*3e$G@03%7J4R>XJepw*&O4fuw<%ur
zADo9<{j>Avj)U{YJ-phQ#69RX6euWixeJ<<=QBrjo%rLBI7t%T%F6M8>0&=%x_H$D
zU=}IjMC`nBW9@3y#JZ%7g>s2jKmXyYR~GLEG74@ZLiNCM@>aEPA}@=LhH>{YcY6$R
z7sP7QIJ%7xE!DJ_mGU~RDb$*AoP~80>Ch#N7ful@U5t`ya5snmq&PHzAQRBdGztZ}
yjuek^%M4K-KofapgUBd^6w&Dy>6r<M0|BYn|C^dUV>x!o^GE%XKkQHW#lHjoEFcyD

literal 5742
zcmai2-ESOOm9OvqZnvF|?e-*@p2=s=hm&|1Kp}+5kYpF+n2-d5N|`Atear2#tE=0$
zZYNH2`^mEsD>O`YB_80ZEg{e%Ayx>`NDMDL^E}el16pZ^75M{JTFndlJGa_xPa?5Z
zx?ks>x*zBK?z!jG+m%XD!}HO(_uma(*0lek#{OfW@)lC^8)QsldZf8osk=JbMr3qN
z*VKt-MpoB$ok=~1dS2D_sL(CC#Yw&7mee@qZpE$YTA+<*0&TN4p54><I<sPf*?YP>
z)6v|yf6$o2a_?y@7nr-IdxYg#;XTc*vmz^@Jj%+ff^wc!*$m1BR%5d$7ug&;g0jK%
zJ6fy$6f4)(T6$_-j|W|p_RSs(qSR^+n4g;M{-8}9`;U&wTS!SAS)kD_U4!Ya$qd(G
zCPuW<;%XQNYlH5CfOF!1Y9d=}*=gSMV!s=BURw0LZjTKjs+T=)d*DY|k6XTV_3qv4
zx7WPYudQ8o%WL1d`S$hOcU0@&G+x3;D*K-Zwd<Ge|K+DY|I06bM#U!|{_`gv{^0(b
z&A<NPSD*a;cmMs{AN=)KKf1r#>s*cf=y4JzhwHB=8(5>qf`|A0xYq>(3q;TsVK2VV
zf}kJuI`@&hJ`Y&f7R&v|+`)u%NC_!DuUopJv<<ZAv5^W$UqwoW$b`15jSQhnz+%_<
z;JYJp=R49I8++O{?VZ(;B{Tt{G4nl?!jjh5-qXg;p0;b?)kmN(tD{Su*$bM;No!YM
z)au%Th8|;_7X_m1>AJRk?4EWH0Q;tPTl<bC4XD6bOLg8R(bGdEr8`J!@e<XRzNX{|
zh3oNwSa}&tY5sELcONkS$_w-fk~UoG1aZK95iDPhdTl>SuAq`JJ^dS=r!+&Wm|9Vo
zh}7!!gE%elz-JNUoTO&g@26%Mi_}g85Bn(kQ7BR~0-xje!Ol`!4Ej+3oLGc~wDKs7
zS?`etmX6qP=SUx0!Q;eBg6%;Nw=t}~P7`8bQ(R0ou)o>`+CY_dpgVe5uj);G$vmd>
zCMZ(@k!I<PE7ko16;c@ltqW}o1?=fiqcJu+8i{-7`@#~ogo<``C~{|H9`p%E>QEKb
zx}fc11ZE;xQ2y6P*3R2dab79^^CMdnM3LFdf%1!@B<*n-%CAh7Zwn(ErJ;>Yi4zYs
zzk68C+pmzyp~l?WunNIE^rJ!Gi69Z@`7CxYw3fw=;1yJ-+x&lUz)4Kc2$Ii$_9{{a
zf|J=aq=<GjNJPM&M?<Tc8oeaV5e`_$(@NhLV2oGPw|ohMr8f8DPQV*9;SnO300k$D
z!s&$ZqeQdUqn<A~X^<}>YdL9sawPk_2kj?4?qLRZk*L|q)_V~P`2OP3;(7=RLtp|_
zLN^!25D~=kB8~YBWM-e++evUGv>r{*>qWeCNXHb0_$#2c$q>+!M<NU<{A{3tqYZQ@
z3|Ycp29Bzs6RLoRku7azp>%{PbqPJA*BS#;z*$b_#*WBs=A}dEc;6t+gI3tX3C2jd
zaZx&Z7@Mfjets>}c3s)*i%nQ?Y~D|jAi2~W=3ruj=;HZd^D6VZ-UDB>H@qYq247ye
zaN$cqzt`UQ@*5W}EcXQ`>jhI##xeEie3Ky@R@L_&6u5Dz`MpuAlxd9QlG=Svr`gFS
zP7TpZ9az(_9i%hR8}kwYV*&lNapXmhw>NN7K~Jp!f=-?F9`E|1l}`)VBm|XW821N4
ztuQTZgp37o)+o}xBL8u~leBbbk8UyBA;BQc>T0#AlhL2maY?N#>pPEKOtf2uUh{bC
zc=OONzl^@Ab_pjrB&FRyqcX7$RtLJ=`4j0{>|2A6pjEV_)4=&>#+Jb^;cS~sS&r?c
z{XS+;{|L5al!NJa2p8ZdROF>~K!Npu@t1kf3&P%X#yMHo(~}>5X3YEqMLD1dRs|?3
z%Hp^r%9|BgAP-@JqT;x!P&C8x;~I^lP=wV<d;|NkbWf8uFjZKa0B7aAy1g@mmAwov
zZxUQS)z425!^t>22b5WU3giQ{s8i2R6YV8rsYQ@VonQwxBAH-lg)dV-sfAyl>>_0=
zl)XXOYm|Ks8DRJ()jv<!7byE8vI!Jc6cjS`ukaQ+(tK}%sR@g+2mBIs*$N7O2PG?C
z0$Xwh8QA~}7N7>K7|1Q8McgRV+Q$u$a}AtrI!yGCU6>;0T{MKY39G=2_jOnSedI_3
z`qwsr7nx%wv(%Zlani}^J2>gE8mPk+APu2rOLJ0lq{0+vInwInE@&A~$zngH+YHA!
zw;p!ryr)h(h+%{~AmjPZuXKL$V|xDMD=jm%{V43jRP;Ce)P58)v4L}G_9Gl`3^3q9
zYCiC}>+FcIy#;+ej{FDk5S^W@v_&YQAhnW>-lJBYe-ksf4zBKC2fWIvCj1tN{2Rzp
z6GNu@PO3kCR_-&5`;R;_4+CX?XrSo{wIK1jk~djmeh+<Zk}o|3jj8MPA*}M`gyscm
zgmO4xI@wui?^-xX#-Ub`86BM*uwA{bjY`5E+k4umEO6FPR+urWBF!MxkY<tQL_V|R
z@Bxg`5m<pboU#0_{y}qelyvx>CXbGbxX(*SxYw`IS+CQ5ow;bD!W^}#>Ud^Pdt!c2
zhyIMwJkr7wbH|WJu4zw<cm8*@2ufq;f~b+V0bk?PXh|;N)>z!tw>18-EXhT=)PU#N
zKn(B&&D1Vy+aKS<w>!U<jf_tki`o|E;J=m&EMM1T1NH>o3CqEzyddZ4yU{$xY2XF@
zpJ16r$5b0F)Q^km=Bzx1e2x{~H^xU;k?yQLb6gikVQbJco|VV<bm~E#QIE@lJceDL
zK<~Uf!E$@J1sj@lN|1t@<3&iJG+vPN@<n`YDqB>N$1!75z98#c&J%-Q*;%K((^}R3
zzdd>>TcvynF7ZST;5)a!UZOXsr$iD`JtQNwv#js1tYYJq_CzO%5eLlzSv2BB$o2SP
zKG{%GYREZN&dKBQ#jPAh|NeBPQr1*oEzZkXRhz}g1}r)4=w;a)os=h~v1LOO|0+*n
zg)gfWQtM^vS=8i7C6`mw#y6*x+)sU0?)dIhc6UtW4-Uxx^fU554dk4W$H8Bm%=5M^
z(ps3dWnvAJmOMo|k`-u(v~xPs9#DNoo{>3)8zCxGSN=<;7n5(w8MVh5?6#_E)%e&%
z7AB<foGcFY{_Od0Nm9_kf*NUI`;UkHTv?SfKr3!itoqN`)!Del%CZV1tDYCM(OJ}H
zCp;Dh&PL}B@t_Ah=j2&=Zpw28JgvCSD)KDGIftZr!PmlAHSm=|n?w5*ptK5<&d6En
z*@KmUHL0)}&|aahWNS*RsXk>tWO-2=y$Zfr@V%Pdv_i$*tj><I`Hr>!b;n{0Y;hMh
zUs>?R+Hm0?uid!g`43`pkRo_JFty5;C-;3wK@eh~^I4%B{;7XYktn&OcaT(g-hugh
zr%Yjt#AW|4#<q1Qb9-Alp9fRRNR8zS{9C9#V~am6UMBy&-Rt(Rkn5k&*p4<l!Ftn>
z_)r|U&pND3;^ij1KQ5H#z;Dn@_^rUi<9@(vXbsPPZ`6FfxzEdkWDp7RFkfp5J_wp>
z&(wa(!2Oiw9-g>yE91DamX&uW?YpWyyneOaM#rV)H`X!CEgrPP1U~FO>GW&O+X2GX
z=n&z?#n-!mpWuF*5{E}_gmIW`1dN6t=QWHw!&(se{Ul(X2)o2Pa!c2zw-jPqOzjq?
zQ@Tzz)3A6jfm=Gb){;;4)2)VzAe5u2e1-#GDYa;p)QICWck|Bm)n1bDCM1vrO8Nbe
z`XDaKJR|a!(p=p0I^1W<{Yo7X#E6kNxi6{pFbp0YxL9e<=N)tk<x;8ZwDlT4OY<n_
z>NwRt>tUr;<$3!t3mIJdB&~E4gq#~t-v+`i_sC~4>rw2*$$#Qkk-7VEpr<^8w5SlG
zB$^i5JcM@wb-BjfTdr{f>FyfJHP^hly6VmxjEo2(;A!=MK*xihpa}}?-XInp7U|Bm
z2b{y>QETwj4+0hPDSyxi5eX7)ZO}#F8EmKa#Giqqi1@=TOn@0McQ(fVhe-fCS-8H*
z>42ri!w@b{u)X3s>hl%XggjFHR;u5|(l7FNh^u@UI?o)kw0Lluh&lOpWbiW$ib1MQ
zRmCLa_hfGsWl=u`YFSaxHmayE>hmgYsiEFPJ+CjpPo;=;PQRd^pwWz)8jC!qqERl8
zC%dR<=fH0vrY)OwL-nE`?=-LK^O(aiYKQVVTA9a-|0xE4h&8vCGH=~O9RVIx!0FJs
zE6@I0Tb}$|TfV6x(`Aa>Va~kOJ9lq-g8O02O?=2-r?21m`(LjtKlFL%KR|?k=<~Pm
zWtvw3=W-nL@1mK-9J2^F@mo(s;RF25qgv`@C0`{b6?l;OOv_>7`49aNY6XHR+Esu0
zvQtQb|Dj_`ey9?`A>FDU3uPK9e;EM=esW%+O(jia6lRtuFKXtZVVN~MUsmsYzWjgJ
C51|17

diff --git a/anomalydetection/deeplog/log_predict.py b/anomalydetection/deeplog/log_predict.py
index f9c348a..59c5215 100644
--- a/anomalydetection/deeplog/log_predict.py
+++ b/anomalydetection/deeplog/log_predict.py
@@ -8,11 +8,12 @@
 import torch.nn as nn
 import os
 import matplotlib.pyplot as plt
+from collections import Counter
 
 # use cuda if available  otherwise use cpu
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-pattern2value = []
+# 记录每个 key 对应的 value 的长度
+value_length_of_key = []
 
 # 继承枚举类
 class LineNumber(Enum):
@@ -20,35 +21,44 @@ class LineNumber(Enum):
     NUMBERS_LINE = 3
 
 
+
 def generate(name,window_length):
-    log_keys_sequences = list()
+    log_keys_sequences=list()
+    length=0
     with open(name, 'r') as f:
         for line in f.readlines():
             line = list(map(lambda n: n, map(int, line.strip().split())))
             line = line + [-1] * (window_length + 1 - len(line))
             # for i in range(len(line) - window_size):
             #     inputs.add(tuple(line[i:i+window_size]))
+            # log_keys_sequences[tuple(line)] = log_keys_sequences.get(tuple(line), 0) + 1
             log_keys_sequences.append(tuple(line))
-    return log_keys_sequences
+            length+=1
+    return log_keys_sequences,length
 
 
-def value_log_cluster(log_preprocessor_dir):
-    log_value_folder_cluster = log_preprocessor_dir + 'logvalue_test/'
-    file_names = os.listdir(log_value_folder_cluster)
-    pattern2value.append([])
-    for i in range(len(file_names)):
-        pattern2value.append([])
-        with open(log_value_folder_cluster + str(i+1) + ".txt", 'r') as in_text:
-            for line in in_text.readlines():
-                line = list(map(lambda n: n, map(float, line.strip().split())))
-                pattern2value[i+1].append(line)
+def get_value_length(log_preprocessor_dir,log_fttree_out_dir):
+    global value_length_of_key
+    value_length_of_key = [10]*(len(os.listdir(log_fttree_out_dir)) + 1)
+    log_value_folder = log_preprocessor_dir + '/train/logvalue/normal/'
+    file_names = os.listdir(log_value_folder)
+    # for i in range(len(file_names)):
+    #     with open(log_value_folder + str(i+1), 'r') as f:
+    #         x = f.readlines()
+    #         if len(x) == 0 or x[0].strip('\n') == '-1':
+    #             value_length_of_key.append(0)
+    #         else:
+    #             line = x[0].strip('\n')
+    #             key_values = line.split(' ')
+    #             value_length_of_key[i+1] = len(key_values[0].split(','))
 
 
-def load_model1(model_dir,input_size, hidden_size, num_layers):
-    num_classes = len(pattern2value) + 1
+def load_model1(model_dir,model_name,input_size, hidden_size, num_layers):
+    num_classes = len(value_length_of_key)
+    # num_classes = 28
     print("Model1 num_classes: ", num_classes)
     model1_dir = model_dir + 'model1/'
-    model_path = model1_dir + 'Adam_batch_size=200;epoch=300.pt'
+    model_path = model1_dir + model_name
     model1 = Model1(input_size, hidden_size, num_layers, num_classes).to(device)
     model1.load_state_dict(torch.load(model_path, map_location='cpu'))
     model1.eval()
@@ -56,16 +66,16 @@ def load_model1(model_dir,input_size, hidden_size, num_layers):
     return model1
 
 
-def load_model2(model_dir,input_size, hidden_size, num_layers):
+def load_model2(model_dir,epoch,input_size, hidden_size, num_layers):
     model2_dir = model_dir+ 'model2/'
     model2 = []
-    for i in range(len(pattern2value)):
-        if len(pattern2value[i]) == 0:
+    for i in range(len(value_length_of_key)):
+        if value_length_of_key[i] == 0:
             model2.append(None)
             continue
-        input_size = len(pattern2value[i][0])
+        input_size = value_length_of_key[i]
         out_size = input_size
-        model_name = str(i+1) + '_epoch=50.pt'
+        model_name = str(i+1) + '_epoch=' + str(epoch)+ '.pt'
         model_path = model2_dir + str(i+1) + '/' + model_name
         if not os.path.exists(model_path):
             model2.append(None)
@@ -90,18 +100,21 @@ def draw_evaluation(title, indexs, values, xlabel, ylabel):
     plt.show()
 
 
-def do_predict(log_preprocessor_dir,model_dir,window_length,input_size, hidden_size, num_layers,num_candidates,mse_threshold):
-    abnormal_label_file = log_preprocessor_dir + 'HDFS_abnormal_label.txt'
+def do_predict(log_preprocessor_dir,log_fttree_out_dir,model_dir,model1_name,model2_num_epochs,window_length,input_size, hidden_size, num_layers,num_candidates,mse_threshold,use_model2):
+    # abnormal_label_file = log_preprocessor_dir + 'HDFS_abnormal_label.txt'
+
+    get_value_length(log_preprocessor_dir,log_fttree_out_dir)
 
-    value_log_cluster(log_preprocessor_dir)
-    model1 = load_model1(model_dir,input_size, hidden_size, num_layers)
-    model2 = load_model2(model_dir,input_size, hidden_size, num_layers)
+    model1 = load_model1(model_dir, model1_name, input_size, hidden_size, num_layers)
+    
+    model2 = load_model2(model_dir,model2_num_epochs,10, hidden_size, num_layers)
 
     # for Model2's prediction, store which log currently predicts for each log_key.
     # When model one predicts normal, model2 makes predictions.
     # At this time, the forward few logs with the same log_key are needed to be predicted
     # so the pattern_index is used to record the log_key to be predicted.
-    pattern_index = [0]*len(pattern2value)
+    #pattern_index = [0]*len(pattern2value)
+    #pattern_index = [0] * 63
     start_time = time.time()
     criterion = nn.MSELoss()
     TP = 0
@@ -109,111 +122,176 @@ def do_predict(log_preprocessor_dir,model_dir,window_length,input_size, hidden_s
     TN = 0
     FN = 0
     ALL = 0
-    abnormal_loader = generate(log_preprocessor_dir+ 'logkey/logkey_test',window_length)
-    abnormal_label = []
-    with open(abnormal_label_file) as f:
-        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    test_normal_loader, test_normal_length  = generate(log_preprocessor_dir+ '/test/logkey/normal',window_length)
+    test_abnormal_loader, test_abnormal_length=generate(log_preprocessor_dir+'/test/logkey/abnormal',window_length)
+    
+
     print('predict start')
+    
+    #normal test
     with torch.no_grad():
-        count_num = 0
-        current_file_line = 0
-        for line in abnormal_loader:
-            i = 0
-            # first traverse [0, window_size)
-            for ii in range(window_length):
-                if ii < len(line):
-                    pattern_index[line[ii]] += 1
-            while i < len(line) - window_length:
-                lineNum = current_file_line * 10 + i + window_length + 1
-                count_num += 1
-                seq = line[i:i + window_length]
+        count = 1
+        for line_num,line in enumerate(test_normal_loader):
+            model1_success=False
+            for i in range(len(line) - window_length-1):
+                seq0 = line[i:i + window_length]
                 label = line[i + window_length]
-                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+               
+
+                seq0 = torch.tensor(seq0, dtype=torch.float).view(
+                    -1,window_length,input_size).to(device)
                 label = torch.tensor(label).view(-1).to(device)
-                output = model1(seq)
-                predicted = torch.argsort(output, 1)[0][-num_candidates:]
-                print('{} - predict result: {}, true label: {}'.format(count_num, predicted, label))
-                now_pattern_index = pattern_index[label]
-                if lineNum in abnormal_label: ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
-                    for j in range(window_length + 1):
-                        if i + window_length + j < len(line) and line[i + window_length + j] < len(pattern_index):
-                            pattern_index[line[i + window_length + j]] += 1
-                        else:
+                output = model1(seq0)
+                predicted = torch.argsort(output,
+                                            1)[0][-num_candidates:]
+                if label not in predicted:
+                    FP += 1
+                    model1_success=True
+                    break 
+            if(model1_success):
+                continue
+
+            
+            #如果模型二预测normal   TN+1  否则FP+1
+
+            #现在有63个预测normal value 文件  对一个line  找对应的 value normal下的行 进行预测   
+            
+            # When model one predicts normal, model2 makes predictions.
+            # values：all log's value vector belongs to log_key（whose id is pattern_id）
+            # 是否使用模型二
+            if use_model2:
+
+                seq=[]  #得到63个normal预测文件下的这个window的seq
+                for i in range(31):
+                    with open(log_preprocessor_dir+'/test/logvalue/normal/'+str(i+1),'r')as f:
+                        key_values=f.readlines()
+                        key_values=key_values[line_num].strip('\n')
+                        if(key_values=='-1'):
+                            continue
+                        seq.append(key_values.split(' '))
+                #将字符串转为数字
+                for k1 in range(len(seq)):
+                    for k2 in range(len(seq[k1])):
+                        seq[k1][k2]=seq[k1][k2].strip('\n')
+                        seq[k1][k2]=seq[k1][k2].split(',')
+                        for k3 in range(len(seq[k1][k2])):
+                            if(seq[k1][k2][k3]!=''):
+                                seq[k1][k2][k3]=float(seq[k1][k2][k3])
+                
+                #补全
+                for i in range(len(seq)):
+                    if(len(seq[i])<window_length+1):
+                        for j in range(window_length+1- len(seq[i])):
+                            seq[i].append([0.0]*10) 
+                model2_success=False
+                #预测
+                for i in range(len(seq)):
+                    if(model2[i]==None):
+                        continue
+                    for j in range(len(seq[i]) - window_length):
+                        seq2 =seq[i][j:j + window_length]
+                        label2= seq[i][j + window_length]
+
+                        seq2 = torch.tensor(seq2, dtype=torch.float).view(
+                            -1,window_length,10).to(device)
+                        label2 = torch.tensor(label,dtype=torch.float).view(-1).to(device)
+                        output = model2[i](seq2)
+                        mse = criterion(output[0], label2.to(device))
+                        if mse > mse_threshold:
+                            FP+=1
+                            model2_success=True
                             break
-                    i += window_length + 1
-                else:
-                    pattern_index[label] += 1
-                    i += 1
-                ALL += 1
+                    if(model2_success):
+                        break
+
+    
+    #abnormal test
+    with torch.no_grad():
+        for line_num,line in enumerate(test_abnormal_loader):
+            model1_success=False
+            for i in range(len(line) - window_length):
+                seq0 = line[i:i + window_length]
+                label = line[i + window_length]
+
+                seq0 = torch.tensor(seq0, dtype=torch.float).view(
+                    -1, window_length, input_size).to(device)
+                
+                label = torch.tensor(label,).view(-1).to(device)
+                output = model1(seq0)
+                predicted = torch.argsort(output,
+                                            1)[0][-num_candidates:]
                 if label not in predicted:
-                    if lineNum in abnormal_label:
-                        TN += 1
-                    else:
-                        FN += 1
-                # else:
-                #     if lineNum in abnormal_label:
-                #         FP += 1
-                #     else:
-                #         TP += 1
-                else:
-                    # When model one predicts normal, model2 makes predictions.
-                    # values：all log's value vector belongs to log_key（whose id is pattern_id）
-                    values = pattern2value[label]
-                    vi = now_pattern_index
-                    if vi >= window_length and vi < len(values):
-                        # Model2 testing
-                        seq2 = values[vi - window_length:vi]
-                        label2 = values[vi]
-                        seq2 = torch.tensor(seq2, dtype=torch.float).view(-1, window_length, len(seq2[0])).to(device)
-                        label2 = torch.tensor(label2).view(-1).to(device)
-                        mse = 0
-                        if label < len(model2) and model2[label] != None:
-                            output = model2[label](seq2)
-                            # Calculate the MSE of the prediction result and the original result.
-                            # If the MSE is within the confidence interval of the Gaussian distribution, the log is a normal log
-                            mse = criterion(output[0], label2.to(device))
-
-                        if mse < mse_threshold:
-                            print(mse, mse_threshold)
-                            if lineNum in abnormal_label:
-                                FP += 1
-                            else:
-                                TP += 1
-                        else:
-                            if lineNum in abnormal_label:
-                                TN += 1
-                            else:
-                                FN += 1
-                    else:
-                        if lineNum in abnormal_label:
-                            FP += 1
-                        else:
-                            TP += 1
-            current_file_line += 1
-    # Compute precision, recall and F1-measure
-    if TP + FP == 0:
-        P = 0
-    else:
-        P = 100 * TP / (TP + FP)
+                    TP += 1
+                    model1_success=True
+                    break
+            if(model1_success):
+                continue
+
+        # 是否使用模型二
+        if use_model2:
+            seq=[]  #得到63个normal预测文件下的这个window的seq
+            for i in range(31):
+                with open(log_preprocessor_dir+'/test/logvalue/abnormal/'+str(i+1),'r')as f:
+                    key_values=f.readlines()
+                    key_values=key_values[line_num].strip('\n')
+                    if(key_values=='-1'):
+                        continue
+                    seq.append(key_values.split(' '))
+            #将字符串转为数字
+            for k1 in range(len(seq)):
+                for k2 in range(len(seq[k1])):
+                    seq[k1][k2]=seq[k1][k2].strip('\n')
+                    seq[k1][k2]=seq[k1][k2].split(',')
+                    for k3 in range(len(seq[k1][k2])):
+                        if(seq[k1][k2][k3]!=''):
+                            seq[k1][k2][k3]=float(seq[k1][k2][k3])
+            
+            #补全
+            for i in range(len(seq)):
+                if(len(seq[i])<window_length+1):
+                    for j in range(window_length+1- len(seq[i])):
+                        seq[i].append([0.0]*10)
+            # 预测
+            model2_success = False
+            for i in range(len(seq)):
+                if (model2[i] == None):
+                    continue
+                for j in range(len(seq[i]) - window_length):
+                    seq2 = seq[i][j:j + window_length]
+                    label2 = seq[i][j + window_length]
 
-    if TP + FN == 0:
-        R = 0
-    else:
-        R = 100 * TP / (TP + FN)
+                    seq2 = torch.tensor(seq2, dtype=torch.float).view(
+                        -1, window_length, 10).to(device)
+                    label2 = torch.tensor(label, dtype=torch.float).view(-1).to(device)
+                    output = model2[i](seq2)
+                    mse = criterion(output[0], label2)
+                    if mse > mse_threshold:
+                        TP += 1
+                        model2_success = True
+                        break
+                if (model2_success):
+                    break
 
-    if P + R == 0:
-        F1 = 0
-    else:
-        F1 = 2 * P * R / (P + R)
+        #现在有63个预测normal value 文件  对一个line  找对应的 value normal下的行 进行预测   
 
-    Acc = (TP + TN) * 100 / ALL
 
+    # Compute precision, recall and F1-measure
+    FN = test_abnormal_length - TP
+    TN=test_normal_length-FP
+    
     print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
-    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    Acc = (TP + TN) * 100 /(TP+TN+FP+FN)
+    P = 100 * TP / (TP + FP)
+    R = 100 * TP / (TP + FN)
+    F1 = 2 * P * R / (P + R)
     print('Finished Predicting')
     elapsed_time = time.time() - start_time
     print('elapsed_time: {}'.format(elapsed_time))
 
-    draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'],[Acc, P, R, F1], 'evaluations', '%')
+
+
+
+
+
 
 
diff --git a/anomalydetection/loganomaly/__pycache__/__init__.cpython-36.pyc b/anomalydetection/loganomaly/__pycache__/__init__.cpython-36.pyc
index a94fb9be6d5715d56c5725858ab5bcbeb5023745..0f6b81b9067572473c60dd66a47af2c4f77a9d42 100644
GIT binary patch
delta 26
hcmdnQxQUU~n3tF9)I?SX&a}*&)Z!T5%AARQDF9^72v7h3

delta 26
hcmdnQxQUU~n3tF9#6(sHPPfdQ)Z!T5N}q{-DF9-u2o3-M

diff --git a/anomalydetection/loganomaly/__pycache__/__init__.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/__init__.cpython-37.pyc
index 17b247bb22f2c009cd882030eab5a6488a96c89d..c0f594c244bd083119da13aa9ac3f110861cfa13 100644
GIT binary patch
delta 86
zcmX@cxQ&t9iI<m)0SHb#^_j?RZuu+T*(%1jGA%PFwHQd}q$U>SW#*;F<W`pF7ZqpZ
g7sTY`rzhqm=2RAE7RUJHr#nK~6T8h=KZ6Vc01dew)&Kwi

delta 102
zcmdnSc#M(TiI<m)0SNX^YMsb!?)fzb$ab}gdAelz)3wV$)RP^ZPj+<2m@2&3yZ*`U
rZO^uLzgRyt#wS1BF)uNvvN*FCnXOx#k(rkopORWOvD%Ea7-TR2vw$lo

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_quantitive_train.cpython-37.pyc
deleted file mode 100644
index 2a26ce1e6db7da3dd9dccbb2dfd70625e2666b40..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3412
zcmZ`*%WvGq8K2>6xlgU$lI=Qb(?hp#<-kpl!f^`c;p8Ig5(5rcB?P_0l_+z`)o^54
zGPf`w7eNsiFbwp*3luGSC~_zg1StBC%(YGtUwZ2$m-hD!*QsitBslZ&<8bDC{JzJ#
z?RJCVdiATne7Ziz*x%{p#pQswj;DBvK`_Bn7F07IT7ktY+8#QAV`wXNhhE?r+D>aj
zKk$e3pgvpI2%2V(R?sqYJ7~vj(uvvr+@w2Yav+?{67G}-oe!DtMC}n1wb<I9KedAe
z;fwks7A%T}XkuOx{3F(DJ%?OuyT^6=qr>4alE=4YqGH*zbo*AE6}fyrQc)2r-5_Es
zk3=lB(?1lEw)>;QKK=gUvS&nb2}H~SE?8iF#s*w)VLkE#TiC+E(T;G1huQs%IV|uX
zMoqgna}lR~ceVx19v<4VxQ;<F#Y=W-O{^*7th7oN+NF)rQSQE1a&T-q=CK1_O}P^v
z9A`@F>Vn}2d;1o&Y~G1fe=i&)vQWK-c8kMNEM*M`Xg>^-EKy;on|p~6aTXTINvzx1
z;V{exVHzFBvd~tRX=m%BTQ{|n7HX)ytt5*hsjWf3=W3^j(}8Xz+2~N2-(?e9%a++c
zpYb3@Jq-J4R1{(OC-$$0_<G~&&NsjR<2Qf)15MAq{_V4`Ki_#{<J-S|@$9c({o~8e
zzy0EOJ6rkgJ6V(-7fErJ{#mgH2|^Ly+ljJ#hy@~6abG2Qwv*<&GaixQj1CVDqf8|#
zxfh2@MoG3gI+g_CHlD&@nRmE{dx@WPj9)fi*rsn`4P69ma?*QvieF$*Yyt=ETW|&2
zx2N_5j+yeY^?&#_Ic#9TF?_q{X;<a4zo*@kSmuSXm$96-RIWYfmh|JEBNuQEYz(&q
z*4}H{>c1vez^aB-`q(rsto*kGMnNEAd=2->e5J~n75}4+kA+5?YamKCVWbIk;uA~o
zDc#nNHMYh8-2~Dj0NHOWF`EbWIwNgM(#YPH9D6+fH!etxoWSHo8ZOW<kKu(bu0e{n
z0S(Q=mzAjva(NU<v5r4xid^6q_({7`WcC%o1L$VaFa~rRVK~giVM=r>Bu}T+7vtbc
zidV6nZqE?dkJB_fb8?T%Ws;JtY2t#gYYz8$tLD3g*Qc<gix5K=Hg?OPw1VXXKP4;K
z6Fy}Wu46~p3TP;uQ|pxD%>%_bQ#G2$uJR}K(k=NK!g^rAik|ZKXB<#2g4!C_?tiKp
z!miexTJN(@w#NSbE2>fYxcMql19%(bdg+z^IxC&BzRrZR29VPHCNjx2cJS-l^u?^N
zzLvGp2Y>%ItE3Q+V~MTdk5%%&HLxL1<Nj84_SvuIyep95(n~U&Gcw@x60%{em;M^g
ze!>r|+w9<<kf2s_l8o@P&c;pERIUAX*}&K#UCkTu=I9N!Y{6oU{jRZ@S2ioEYyoNB
zw&oJ~#1-loiOitR3>2oldF|w*IMcq2BatFYAm5GBL?Lx0h0^vg8fiPply*mwCOgGq
zD;r4yoOWfD?Z%Q^E!T+QDN_)%J4o}0Hp#PH?6?<=;$B-fkzww}$Kfbad%As;WFkKb
zvEeRWJF^^Wa$a0$KF|)<7TT*8$PKeUq5yeQ^F;G~UC-hpBxGVVh@5?_iK-sG94qU)
zaTd!+#b>iy8r_N0SFm^S6AX+ucng`$$Gw2uM)w911I5%5zsS4%M_AF|@&l~xn>+~O
z9G*HJ1M#OYZdnt6+qfZ++Dh*fw6IqR_%6|KCsGC_H-OujSc(ADQh;L#=$+c*HUgnt
zdcqXJ$IiGz&MlkRrS_PQ=TvRtBV@X&K53M#NpnhRW&R1j@2M7@0FZw-MlByN-XE!U
z*)be}#I%HGdAw3CPdW&@IXqoKXL6+CN{Z#ORW2joR?BX=TCP-Fz-z6XpUj)8x;9xb
z^J2L+S)8&r*?1A(mWX%o6d|%K8k3cBK|opr;-XqDJNs*={5+f51kUP<Xss~e-?ijJ
zwZ4CDa=!GZ{0T4KRqK-rM5C8^*>Yw_W4(3tgULnc{mPV;zG&l|by%je%BJX*4jv+S
z<|Z2?4`i96Gfi#pZl5fkDQZ;f0GG=r-FHMZ46Bk)RpafG+FS7`@9({R>jR|V8*r;`
z{P(l26?=z+K^p7ED9OTMJj~^Bj|Uz$6p1w9B?(R^KfXb`Z2aoM`0|7CCzm!%8BMa?
zFwKkN>c)eso39VX@~2q(TwwT)Up}!nN9tt$7S&rU+!$ag_F~cdp|%TEwIGtPCePDA
z5R_D7B}KBNJgd9!p*9vbG9~lT@fJw!<|CC1wL@w`=18Ilpm{6g8??S2BkW8}Xt$4G
zjj58V2xZo}m&8Z9PQ^Cdm66c?ov448N{x2V{b9#K#UpuzcJWYVhLMs#CpnD8th~^U
z33E3oU>EHVqq{MPqRJCa5#56c3q{c=l0NbajxXZgIr$cCvPi@CeUYScix5YbWn5Fp
z55WlPGS8JsCcz3STAa=j05+89Zr~VsmDaV%+28Ym7QI4)QHsO0YyJ$*VuR90h<+BG
zG*QT8)X;iIrt1yreKVtX(sg57fJSQ92o^MqmyL_%$0W2vnA5JwGV;3NkpBZW#-iPq
zGgPzs-H1f~DPG6GET6AgOIFM3@{aA}>2im!AS3x!7w;&#xHpiSI><~HK>1uI_^-EI
z8QV9F(`J@c7-?3oMs~T4bwR7j+IMnz_hW-5-K`cKsw6Eo$qZGJC%9=BjzBv|vS~k(
zy8u-Y%K)zxFsbL$t)vK}dr<<-5iH(K({Y&2paL!lct(BGqnY;1zXtWuo&1uDR6+bo
R!4?!GWqoVST3GZ~{sYKCkGB8-

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequence_train.cpython-37.pyc
deleted file mode 100644
index 4f8318b26445d6b774b9fa69de8510172cece2f5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3901
zcmbVO%a0tz8L#TcOwV&?AKr&wtbs%ff!!q!NDL-CO@xdOcqOBbNvAzkvpegao^f@p
z?Hzi!ED?&p3YT2bj1(oxA>t4vLW*)u{)Rdx5aDa2aNv^szUtZKp@c)ar>efHuBz{w
z@AdnhYc#41&oBP+`=4A~VC?TSIeO|Ko<Yk#MJJfx2@8vn_npw;1?~2|(6h9Y`2A8?
zvb39&`#~7=D`91}uNv0u9rdto$41x?p74|AHj`{JC)r+WGCyT16{XY><tY#6L?9|}
zv2fufCaR+L785n;>@6O;;gYC}2ELq-%c6P1QU4TPxFY7lRnZF9#C&)X^eM3ro)(MY
z8L<?e6(_>`#B#VUR>E^)HN0P*6Kg9hd_bO%XXT1qm22{(JS9)dGxD5#K;Cy)gVg*A
zYoEM_|IRMAxoKS4@AqSMa8>oRRBgvJwq=?P)C;kWGpS9Lh)aW5NOh0n436-Jjn~;1
zv2i=Y{f<jF;c1|i(4I%jeu}M{<zRN`jNROw@F|-(Q#SUrt3B=K-l22IF)M-Moar)+
zV_yf8O77=;l?kWoO!>H^gT2`bC||gDopJfjtGX(@V&9?j0(<?Lad79Gx|#=g`3kdZ
zOXEuJ<&`y-m-1kZ3GXR(^UbRaYjzJ-oK=>W^8o96S6RVXfcFw3?kg<kkimrv)jKa2
z{J!|r^4J33hrhyi6?{vUFXRFDN*=5-@c8s9yZJZpFVnr?z<%bev2hI=sP5IQ4oZ1#
zM%8s=ubEfT&(S}&vl{vq&7jeGUWXA&mkn3ShrVGHU+9CORAi!i4lT_aZ!nap36zXQ
z(o3cBvtiQH#!GscHf}#28n>5f;|~=LZi3BDIyC-BszGLa6{kCNXg9{aWM$*)f$Hp<
zlC~aV{BAOc>Apd_({7krGT6Bx529hLcTMAVFBOB^5iZ@qw3+Vrqd_<7BypC>%<!)9
zus1WM*@kj29q#MsF;g1sYfy&w4Bs=gRNjt`K2>Ql`&<@<fBu%KHbnc^o9JFVf9>O6
z{pRD}{hEf4KK%JdAHH+#d+VS4`Gb%C^v8ev;hj%D_~o@rgPmv7I626A*%#?Yvt1}C
z67tryI34t{K}an-x;IF#;a|<xh>T`*l*ya>GVRDntGJhL3=d3mN2XH6T1H8HT_#OP
zls$xw@hY$LCJ*o~@TJ+i%1`56LTllBmEX_Rx3ITE2acYj(8?afiew=dEOg#xT`u54
zZv~+%T;U;KcyBY0g+5%uH~uRFA(IY8B0V%xqpPxK&}pW5jtHS><Sch{7P*KC79lG4
zN;$`hN7vt_wX*gn0apB}gIPr|uy8ML!`*7vW8K+}x;>TYb`@T-4<}KNfPo1jSgfa`
z$kcXwLdZ1AdLwBXGZ#qW1F152T57yYSGHd<ev;|_mML9=31VfOZl_%`UM7>SsTZcF
z^U>CqW{;^Y@KTRihVTi?19DZO!pEY>`h|L!c5R~j23lr80j4P7UE-sr)z!w4yd8=g
zddjo*JjQH@PO}M=w&&!0;tD<`+MBu)Uzd;`oUi=~1y2z2xs$s{XwEff?2Ub_1yHk1
z0>UZAuDix4;pHW(<RX&)ji+5vtB^sRpw9kbj}&4RU>rR#CHQTxBh`0l!{g}MW%UGx
za28Zsp|`sgq3=-dleV@DkBked&=Q4^Rh21s2kLgL1X*&XuCx5C_0v(KP}gh+j08)k
zabLpH)hOx@#C}3_J&JDb$4T+U>RvsE+e~9-nvP78Bl8gBHdjv`@wXmC-ky2r0$wlM
zl}-&adQPEL(5z+NN3zQUfrLM<30NU7AA%O{3PDea=y6?h0*XVRx`y@03UGG+(4PQ?
zpzQ{rsG2jZxZ@^LaWgLqUz9}ot~Z_oDAn>hZVc}7af<)~05-3ylUm-G)Td;vh4=WK
zl5P+;z|d#oMNt_q-5Kg;K4(`1?0o`ldAyP@Pv!uEEwp(-JUKk5kYYJ+<ja7+)qFl*
z%~y(5r1`acVX|PubZxR|$EAF2vNUB+u<;VUouIXw?*qKJs7;phMFD96K1+HfpW9nK
z<fquwC1@$Wh{iG#71R~)>b1R-lT&$R%HQMJWxX~zO*CqQuWHfWXf=6KpP8J+|GsYu
zNE1!)ISI|ot*|L{d-GiYTWhjT@<5gr@wCZe{_<%4I8##kgv)J?TF;8O9~BvdT=~MN
z{G=QXI=dG}i`$e=*C|)bu;Tps_QkfRD3hA%=Vweg+uiSW6KSf$UK;gff1nQ9JS>5W
z=qVdM>M0x=J@_JBz5e4j#+z@9Uw>rXChT6i6D5NzJHP(M`HjcBV+*7A1onT%H%IQq
zP><%VDA>5(MLx@RrD&fsZl;w&MaisRs;7ziA$63z6*;Y<ETX=R&a|EfkY+EYS`CH=
zmp~eSFx0)i@kjv>JXQ!FlwFxxru`L(|7{_}_#L3Cq|nweAlJLqlebNUkU82>u`t2)
zxO0O7)p)4eaAT(B(3B9H{aC9-62WTB0g`(bk^No<y_ldM-;f|On^bTvz6BL#ns7_>
zIw%LgIg{;E>I_}-ed>Nd-EqEEXE8NZGLbb}re4BASW$z4w%{KwBNxbI2GzJ#0ZL&V
zb(;1y$PJFy!a7ay4~7XOFupw*288Z_SdHSEDAXCKKTYYjEmFfu$BsZ}Q?UxAbZC4l
zO<1+|wZ>Jik)$ph5FkUCtRA2R6f9X5`oQVI&R6qatvGGP_zpH_luwxx@KvYEmz}!P
z;!QU|YjKY+1HA*Mg*h@6-gRJk6Igy4RKQh_m?1~@;n7ia?*gXxNOy{|nR=O~f2EEp
zxY5%a8=Lr1wY_<9ds|8Qs?7Ei{pgQgxOin#NI4`$b$f}-HbJ3ynhiw(GP>~ppO={)
zwZHawVNa`?&CzpTyY<VGlJHS{JtenHin?e+-_fJR<D_uxDx2*G3zrORmAR3oMG)1B
zskO7Z3{{2oqW-)-fcw8@!_BmcP5Zi+WE<o%wqUjKXnc6;@IWo$s0m`VgQ&}-3TZlv
zUBY`*7g-eFihFRBMA~pRrB^Jb6`@YnRD?VKGbU6_Unv-D?3no@PyD<rZfPi+{O-6=
zo+~yT2alPvo@y;^(bY7MRb-><3m!}8S-^Ji6gB~VpQidhJCCmL$T~cdyc4gp>ITaT
QbHPIaJomr9fvrUU14gJd!vFvP

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-36.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-36.pyc
index 68a42afcbb5feb42ebfc2c398b9f88475623ff8a..4c2ee03fc6ef9e2513f88f36f4efe4a885450075 100644
GIT binary patch
delta 1255
zcmYjQ&2Jl35PxsI>+GlF*p5HyeAQpA5ujE*H1yI+gj*9qsYqlIjFY#mV!O7p-biVg
zRSsJ?R6;UZAtA&Si5wB?{{RvfxZy)3)DuTIAyf%|Z;jlpHRGAz{NB8o_uhP6dRnet
za~yN?`nMl#Wf^<UUilQ*w;<#07u5sD682-(as+>$HJ$ID7~Hsb)OW`pZa?h*@C`2{
z&LmxaDkK4U8#2BEb)N;wX@l4n+g}S3#04)gN|cyTda7rV(uEgt&W_DpMhem=c04ZE
zy*U5#*X@IMZ#KDCfZtT7#>jKU(V_2(X>xetk52q)5@q<VH@(Y8o(<MaIk2XgS*SgJ
z(CPQv{&3XpRe+C8D8^N8@-$CzPu6%CV=$VKG9<44cJ!0_r7e`8=o_>^YN$kAt<KBQ
zL@S>?;piY(fvisW=_!@A*o5y70tbi<q(!!$A}8de7Fmx?6mO4Kg^^=q7Z|ApCbR+z
z)@Vg&*IBSa4%zqt|5PclU_JKX!QME%<u4-FddNeLQ@j#fq}8)|s*AH!RPH4w9Glqm
zKO)OG-jlM+e7l>bb!bhPpDW!Zks*~>nCNEwY{;?Ql1v{|z{pWWXrUrd2I&?Pvx2Jr
zGF7P(DnXUju-BQ~>~C4I>;oOxRE#?*4s^0(%ces7Um(*@&_*clz1myANo>X`lCa*S
zY|P1GRW7!2gOz#oHsxb1kLYr2Eu$TVcY^-qv8^8ewF_6U_t+h_$0$oVoQBd<koqw$
zqBckii?TTMI8tUo4OV{6`3{`=D;z|?sZ)*WbI#A;GzK|gQw@3Q5E(s_#0HWrgJ=OP
zz`x{M!s+HIM|l|uRr#)grGQ<M%cwEO7UP)S)k8*^A`32qmj>^$yybY+61?Kf6AW{a
z-SIR$&3HH-Mb8%Q@te^*`VHqt`EL1C6aK@I8$H#(Dm+S!dq<=8;qYYO-`Myh_~T!E
z%|dk7_#mlyoxy!KIyWkoeED8ou5L!Z7|*YoFITk|I|qZIhskO8haKU1-UaYlsh4Qy
z!0kuBnQOJyLjP#s?wmZBCdTgZRO`!#XUW`eNcB2Yv~F!CUxP9JEZVoK*W~b-yvB8I
zV-Qu0p{nG04t^R)hF^d-`G%syG83lsqI2u2fpOhNmS!sY-7508qJJzm8F#TAeQY=K
T8N_rXF=gUJQPpkT(DnZSLpUKL

delta 1011
zcmYjQOHb5L6u!6fpzYia3^UAw;n8^jA@LC+kpvQ>(GW3e5EC&Il?-#MFbo~&bP|D6
zXVJJa>PTavuJs2P<I2SUVBB@<Mz_YbdTvFaH}`SA^L^)>p7wsszsZ%(XqvkIeeGT$
zLC809?2+(%9%pOyMQJrcZ<5K_`+bEf^Xm<xb?4g7)emoJCX`kWBonv~<Gd&&VeeVw
zC9@Uifv_t%*$2cs9UbfBmI)^}$P&3t>=>6_N{P+tnoGL$`64A;a`HHby7b{W$X-J9
z?zE#J!VuAc*>TWZ3gIl-AYI8Cfg!G>2(S!+aE9<mhB(8J=aCuW<Us?51M$aFjyNO2
z?g$f&QB2wkBe+Jm`c$g-@qP#caYOZl-S1K)Ctw739T`c`dBl}C1_3#S<p<#kSbnUY
zf;<#JwL>uK(j4l=3&AktHlkf>eHQ6OQCJQ$WnjD`Dr1ldD$E=Sp|H_|g5QVIUoa4G
z22hp`PHA)Kz$;n(kP5IAdXvE^*`?cLnP5LehN11B%V_=YG$bK~-H|q=4mRxywzFvV
zaRy0jUEs<Sr--NYKN50G<dlwa(1V;36rps;*+)*<OmPj07^j3Y@FK5_u?CRG5X+!F
zhP{eouTn6V7A`j&eD@gdMQ}{WpnQn+1wOehy9D~P#2H6%0(s-!SNRd0@s^a?v*M4N
zq<)0kJ6p!?#Ga#{)VC~yuh(qdGFt71J*)3ICv@9t8+xO<YBYrHy;ffJ{P=^<<bZcs
zz2&`A3(+wVYQp=Xe(UsBSIwriS#7K|npJLCmG}`LVpxO7ttho-+q75A_GTqfYg?9q
z#(lgHD~)y2@O#91y}jv&TE?~?33B_<X4~Fs+ZE-YMuYow(WgrlX@05_x=^e6%xF}-
zIaZDq#ogKU3|0!MH0(WL`Is)rbP$J1<K8DWrCST4WX&qz=1a|%FWXJ4wsy)ieaSSf
cX)mrBbiv!v##4Prk*SPQnX*teEHhd83x`_&Z2$lO

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_predict.cpython-37.pyc
deleted file mode 100644
index 179af285b504f560c852fecfd05748c53c39c6c6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3600
zcmZuzO>A4o5#D|8@kbO%{ZYS`W%@_5+1i4QqG)Ouj-5C{54PYs25hi)iD&M!{H(`E
zYTwe9m3^1W1yTfo-2%N9BoMU7p~#_V5wz%~r=EM<Yn>oIwm>gNf%coFC<#VL%)Xu3
z+1Z)d*_nB-R4Oq1uKf8AKYf3Sv40XW{CSvM0VELy!32+3eemUVZgHpO)?GfJd36so
zC(5+4^(<!r>*WIWINuxTGx?ryV@G&>UeAe)$UbNFg2;(HzN2D96!0yIQBlOVB>0D{
zR(g#S*?Nte%#F4PBI7l?!Z&WS(``DmgkJ_Q0(c*gJj9^bE=wK7H8kCIUR+Pz?Q7cY
zIem7G-G3+bl&d`92=_S?p2$3B+UsTdtd|wpKEK9xv1ga3o_2aU<!W!2_c>=<i+9)^
z*!N?0i``|~fo{2V!)24>K>E>_H2Dya%wSMVaph>%b0OR7Gm?s3e7%-28P)AXfpJ@Y
z$7CKxZC{y8qGSjx=|rJ|y*&P$$s*+Z=Vur`xOnfY&wlsS?|)05S6}|()t8^#`+oJC
zzkc!RFMs;_kDq+=#jo#OZ*N?U{peW|CWq(el1E`2G(_;^o*%bcU=V={nksC^_oDX3
z9*w4ry?G-Ewz@&ALO*JBWFW$(TJ1bD`Ad=CdLaDE6J#NP?JR5raUgvatX_)RO+QL5
zV=`dc_Y2(;#gFyNA|@M5v-_2lE2jA_qG!{Sp60)FwmkBVBtOD@HwxmKW1Q9VhHqOx
z42BYx-xMxIVWOJtR%f6Losnfbi{GZskns&aTgw}dHX5(p31TxM17AdNP9n1qAagWO
zAW3=&ChK>=C2(6_?6(3_dK$)}{j>qaHq@i~BzbHzc$VzBs~IGQKcoZT61ukoeNBF-
z7-^r-BI8+J<Ym6%E^<j;X;SRak5nPB(fl4J@Dn_x9qsOVJw#NWrx~5;A%-7kv?sW5
zJ_U_4(6W7mp~?xjm)Ce>yOs*Cdh3#|0B$WJ;u>^UbpJc$7QLxP$5)RoRzFH>Ba%F6
z9Mv{i1Wecr%ou`GG!lhNZcrp3blbA|2zG*Qyx@r+nM_B9u`=0*ZQ1fwEo%y4-07-D
z67B?M^ie285D$<-+-)@?|5+dtGfMMj<R=L{S2)n5p0)D}Z#!{%pvggOTDEy8oY>6e
zRCtBw;Zylt(Dt(f8BSpRc^;Du<c94PO}Stxbu|}!o@sX;84HnEz^eylUwq-t;GMhc
zHYhj#`}x2AapS?wcM+tskW=%_$lmh^gvqwM3O2(DVWa|Sc;{{XCkHEy1aB~EsCK8Z
zc>-bv%5#qw_&6y~DmJY_bOwOtgVg3S7P7Q}fEclf?N&M}oV2*Trbm0+Cbx@eNtb#^
zZGC>vNz3UNr8<?-V-y;oQ&3QH8`ni$(BqewE{e=7ON(1ehtr$N4MR?6b~EXO%J=ep
zmQLcdDL@4<4VVGU0_N0+&LL4c={$7H?(!GabU|cA4ykpaSD@5Nr;~;_`^Z5WRih$L
zL8^+qQlGu#FFr#FaneP=(U*MtS9<;$sP{ikOP~}%QM#zgy0pu+H^(;FOGiq5RE$($
z$1KwLEVI@u=|z$G0ZSKk33e8V{&Q!ZrN=A|4D;&t6*UG~$B^!k^zWZfj~|kBd>~05
z+04G=@-AfTQAmrey99Tj_2Y4U3}Z$VK6QE%VpMyY_1)g2n!+PJrYA^xdIHu}^a+va
zI|5qh{3yzZ)#s#|)+hA|`0XS-QS8m=Mc@S#o$4-vvhUglykxa-<Rn=__QA5Ep0gIt
z#d*3xOPRIb*@ON1<Yo>Yx}}E_ex#=?&lGl5?5q+m?yo-7GnO`k)oDAMhMg#S@C%)A
zbnD-TV?<Bs3cTcOIymDuxSvz;xG3o<+=X2^!qQWi&Fu614Lr+-c(4MVWqnF7@AG^O
zo|U*F%K8-KECcK+_*Nim8hj<tW`UoE$Hw5Xik_jBKFT!WXjF`YcA9qSm3`WTU9&N)
zC+Apt27Hs?J2Oa;z@~uB4KRU9RjaH^%E$KI@3=p?c>fKrzNX659F}`Zl>23^0>qZJ
z;;^g@%9%|8RLqbbYmOWzkyJh3N)2)nw08jsNkt)B67BuUd#JJwOW96oFYi>5m!*3B
zckIF`8KItag4h4UZ|C$!Y4u!n$R>lN8!1ZhXRAtfgKFeI2qMB?3+#PO$L=hx-5hYN
ztq;Dp_wd^m-}&Hbvx$X^tKWMFF*jw<3=<TQA!+$+^;Uo$fW%rJ5Y{f7YXyGNmBBu7
zXKF2s!{kvQNCZ`sFy7c14<f&l1frqB7V)M#<zzGLG@9*hZ1t|q%4sMmsj;%<`B2Hu
z6V29bO8mwn8MdaH{IJ5yQ^a$emUD5tu_1jS#|X<vKi&vTRt0g=md1M$22bS@F(0$c
zzT5y^*{m#=L8{f{3N4?e;S3Fwqa~Fg`5g@PnPC^Df<!f_8?g1YUbZ-Dz!10P&yPhY
zP+Jo-jw+185jUO$%?7cV37V<)phh<6ZcTB>WEprUkKjm?+oKv6J4|`V0|d3YK7Od3
zvnA6MY}i;|*PYup>&_bB_By`nb@%G^>-EBc^Xp^&gP7`igtoyKfs_kmW%+<gp;RBS
zH`Rbm^@(OzO0<vG2<w1)j@+=`Z5bE3n2e1+IZ7@&N5fe%g5Na!7F4(Z6`D;`JRA{k
zy%$qBcpv2R<YNpBCA0`A<3EQ!V8$7D^1u)N1(euRSRpRr^PCymF^q#U$rmsdcolmV
zc)<Y>4lIu~>~!$QSccp(&qDUWSOGTZJ*cCTH=s+cJP0<YotV6Bm{#N1IB_h0h}HT#
zZ+GIW2c0-gzpWEj(hp<XpV`oqXa<-mhDpPJ;)n2PG>CcI{0zdC6glKM+mliQicTJJ
Z(2au2^zIW%K4fN*z@78*#r#M<|9{V+qFw+1

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-36.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-36.pyc
index 8a6f41477ea4e8bf540fa102a2fa4e235f1e1f04..acfa2de52410a244210f58f0fe732a559fa55a6e 100644
GIT binary patch
delta 828
zcmX9+TTj$L6rNMI-Ch?Iv#Z?1c!7EYmc(dK5hM`_mjD`M1=q5z&}E^^bRb|#B-!8t
zk&rlV#5Z1i&<Df6Fww+BUm72L^HKc=p4pwr`8wZsPQN)b?bp^Xty-^T>3_c+J$wI2
zI|Xo;x4};R;kLE;HnNIPAwy*THU9`VZbq<7=;yV)%^{|;WTaFeg43h|SkS#KM@CVL
zlt{)28L}&#!3>bdENc9m)Y$uThOF()kQw5VNis#A5T-MuXhsI2>>B%0m6;W&NZY9>
zHBD@jFG{bp7k<eL=f<{5$A6^TVAv4LF54=5w#EArJ;!091uRK|G^C|YXaU;6-^t&h
zp06oPie&y<d1#v%5;bLrYh_49f@&L4L$SR=fG}`VqD`0Jh7Ei+dBj2OXo=-A^;cOe
zj|Hw*iX|4ritBCo1rPlP)jQD4chsqYPC`g51!0^hJ8UVI{m}JktgblA9xg)KD)NcY
z+3<3D$Q<V7UB7^?g;EfDb~8Q88?|Au`J8si%4x2{oV++LZRZEt7Tn^Reh;%FdOwWt
zJ$<yf3;hZ*f*9hN#Hhmv2l+q!O2Vz%mwB6!8BUBpT0|MguYOy~84C_8ETK0ISuW|)
zpd73-TaV>%z3fpcY-2T__g8(E&(lGq^uB;BUp6in17cYd`Jr(!J&Jr9Bh(QHq(H%$
z=06PoeD%QcOOFO&&7t~QH<r;J1Uo?|xMN;}r~JU&ZM#~N#zgwD0O3l<Yw85gT9@HD
zpSOk#v16@bmVdC$KA05zgaFa{6h^2Zs9>t9T-EG!_40En3us~K`sympVnNQR(V)!y
y723;BriQb9sK>STsJPCY>4$k|!|_Yb0)C1Pl<0`4QSe__N2QCuN%hw?srG-k;><Jv

delta 775
zcmX9*&rcIU6rQ*3F8do$Yy$%Iw=4v;0$!9rViioNNF_oDxYlkBE!e_zFhG`Q(r`iX
z@-BKa@n}34@BRlE4|v%a58m+N#k28EJ1_6s`M%kC@0*!}p07Q|kmH#B2g?sDpNtNG
z4EDl{)^|J3=9|zdL!D*V(kuK3*Nz3&3gh2|9%^*}wwVzI{(%!qW~hZK1ti(7(#hmn
zXqU|}*3e1qq>$?RuQ|59H^=5!z-CyE%`;&MyX=G#)xh3Y8qBUkUD<I%_dfF+%q!20
z+ojcFZ85tw_4!D-0c}SnW6Xm{4aC-3(R1-iJgu(rQbm0B<GtX34hcvoeb5cu!#(v7
z+R)d864>}%yW=@Y7Ir0B;3ipJAy?!mY%8CtGhhOOR_Qcg)Vm!gTHdQi0X4(lEf>3Z
zH_ak7TMde<ks_**78IYA3PtKMsE<MquIM@F$1i#cQuvebH2%{kVE`wL3r=JNd6D;@
z&`J0i+%~r0I{r1rp$|vQG05VEIdw8k^$RM29P+DC#wcs!_`@8S-j9dOZxL!)50x-p
zSBM!6r?I=U9xPao^P;d!EqG0MW~A0O){30VqLJ?VrAkToKF<*4w<HW>**a(4l+&h&
zZ>{r*Y2rD`n!IBME@%`TcxaW*rbvj??IN$%niU$`u9C`!2)sDY;Ujwx7Vx#b+k3ew
z-IL4n5*|pHX{tF)JN@9}qLZ;?$L1f4_`yj{%u3!aL0Xp^YVsa*uytK+XkLO#FAKV;
z^1||^N`((gYBgzfO_ZMU5o~uS^-<CzvWIy$dE6tyvr^IM4L9f^()iLHi*@SWf8n#V
A_y7O^

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_sequential_train.cpython-37.pyc
index cb8839f0cec302e56e80f711bbbcc6f5924feae1..4d0f551811eb299a6569a693195fd6099434c360 100644
GIT binary patch
delta 388
zcmaDZdrp?uiI<m)0SG=^TpYJzBkvU^%U{LLRx!SnX_+~x#XveIHL)l!GcP?Rx3WCH
zs5m3PASNe2Juxpar?NP+IL0SG-4V*3ti|lb_<3?E^FKzB%_S_ljEowa7qRYTWaOBf
z$^M8@W3ms2Eu+ljE)HKd5ulAlnv?Hx$TP}K{tF}(Cd+bqFltRs<5c5~;!H^_NsI?M
zph$7@OwLM1tI5J#XBiD9Kji9@G5~75#StH$nU`4-A7A7QGRXx*7*FoxR^fI6F(HDJ
zcW^I}kpn4btx7G*FNP=s%Nc@H8BMO>*~+LhS&#RCEy&a$ATDMD5*&;?92~4gATdp`
zB9IAD93}Zh$r*Zic}2z`E;odmUr>^nTVyg>o6p0@1}J_DXef{>Vh5?=$t;dfEKAJH
bNlXHIL<1!54N}AAo1an&6fu~*l+OtOz@crA

delta 465
zcmX>n`&^dSiI<m)0SLmkZI0Wxk@pId=hp@x*VQWK>5}D7*DeE5Pj+-Z+0h+is_<g(
z`X{@$J=@y-V*S(@pZs*kyu_T!;>==Xwr+7oW?pK1N^03;DP}Lm;>oGZ{}_cgr?BWU
zGOBN$#JZP}ORzY#urxI<IW>NA0Q)0G^~uH@wv5t~3psq<gn@P!X#j~^O!>vPSPBwL
zGH$VE7N=wu-Qvhi%uWR|ii>1H@+`%PWvNkI#U+U)K-)5tOK!0y=j0csYDyQ$PiEot
zVAP!K%c;hDi!&v$Br!fMGbgo3VR8d!C8Oo!Uz}$d^(UX;>XXt3nZywvpP83g5+7gW
z1hU;3L>Nsj;8x*w1Ti6kljn0UkpWxGT9sOqUkp(OmNNjUGMpUGvz1YMvMBEXTac+i
zKwQiQBsds(I5=2~Kw_F=V5diMl;jsBXXxeS6&V3Vqqrg5{DP9q+#=)2LVO-Z)<E%F
rKtq945j#i?PiAp^Vp(ElPGS<!BkCY=Z;%=`-~5zPposqD9zG`kFhG!<

diff --git a/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc b/anomalydetection/loganomaly/__pycache__/log_anomaly_train.cpython-37.pyc
deleted file mode 100644
index b3540ce919cea5b4ab39a4736cc070e7d9d0486b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4334
zcmbVP%WoUU8J~TT%cm%bk|_BRIZ1)CNF+JUqjlmS4<}6l+bC)Q4z_Z(YtB%jynJ|e
zWJ}EQQn?6<#DFiolR(g-ha!ifMS!9}j{*80%&|?1_*$Sn^wR#mSxT{;)Q568GvB<v
z`SzRN_t-Zol_JAW|K)e@%`P(bH|iYy${=1tN&bb3Gj7E!$Xcsw2ey^bPS*`wL))>}
z%>_9_J8{122Y$B@6sGHnLCNqa2W8V%f(m!J7gsl#VB;CVc5CC=36njZ>)AX%v4R=y
z^THh#%>97zA}`%xyd>=1`9mkD^D?hsW<f0S>J3}|gU<v@C=Ff<micV30{R4>3r_O+
z;1sV1r};wg1YZnR`BHF(H-aa{8NR&4f~UlSI4zb$LoACGaYCFFr^FfYlz8H>1gWhn
zthsU@@6IkaEnT^?*X>5~;HvB>A)B_YY>Hmemlq-xB|_;U5f}Rr7xKP^J=kJ>N4u>(
z9%-jF*lRh&3BOsCJj$CWiET5cSc=0A?U9o@V{5|3_JoaG<tSHqse5Q2TIl6KSr$`y
zYDb>($A#2Otp-c&wmq>%Ipyz8M?iVpzH5*2Ut8sW!>Ypm&s32+8Mj0G0=qUC75ASs
zW47tnN2SzFi_0v{nY#1}yE(qf_McP5lpbq|nRTn9a_XBEZdzVu+-)$AQTq~e81{U2
zl@T9WpUtL)w21M0S6L=0#~amI13Q*jYT?ZsyjkTjbJhLpnd}elG_t&dcX;g)`PLv`
zjwDMfM#^$pY(Or^Ok>pt$L?7-cdjzjn~=GXTJ$cQ{oGz=qZ!;qWw&H*BbQc>shKHN
zR+ZgqT0uQSw_tkJhk7&U)oAX}8P(DnV3xbAEeXVUTpbLAB>YGZ)qTK&HD|TkABdjz
zrHFXk=?U#6gSex#8+Q_=oo+PHPN%1|H;~jgF&5jsf%b+%_7m;NsJBg<b|Nq*9BNPX
zWot+0l)<p}+HpT3e*NAyjw{9e?Hl4C97JkIS8jKDynj0c(`|ICy}fSOZ-=coN)nN1
ztF2wEO>}O$Am8Z?_EdOQ`~5wYQPZL4bS@jxR!3V(Tf4f{6Su>o4G(ml(?yz@F3<DK
z$bQ4*vylFud#K(zxAn!xzy9L4zoO>zPk-_Gryp*;u=?enKKcBQfB5_FKm78OUv6FO
zZ@=D);)A4<JV-yA>_CSh7q_;eUcZY4Tqw~}oqlf%FFqY3GMv((l2NC(HaO7LZP61l
zQX-6_>msf~kmP%)m{qjOR@L(H%vp6)>Q)U@-CDI8)=BeZy&BfiQ^Sm0$KDn(IQkvA
ziR5XF2o_kJ1@;H5ZE=g+cl^NN4tL@E+z*(`0uSKvw0Eh`Mcm3wc~DD*SS9bGQcPJX
zY?M5U#Zo6_p#w`~Aw0}(4wi-<vA;`Wc{64|H0CRBT!4>o>MZppHdYlm!_n^LO`x`P
zJyNZmu-%b~k|mtiECFnLz?$4ZPx~RRxTC^Qmv%awi(Z&?hC)}SNQ$EaArk<$r`?NJ
zHZN%}PE>azYtHK2Mch^-wcTztbJ|Tr+}7m`GQ=j_c&K+)K93{i3+7zF#~cM23t@)X
zFf<U7YqaXSs0dIAg*t}o%i*C*8di-)*Ny}~cHXqmM|l$^`70{L#!&69omyjuTN9$)
zi8J<84zA02>@1W$&S%Pp3%9rIkq0+^#LxZ)e)-4F`rzlMcA8K9DZhf@hp>Q%3+Kh{
ztZh4RcZIKywXtGikBZ;}w+{JBX$hC?Ts98r{(tammSh8OEf=UVN_OcIjMBBO`Z@=*
z>a>I;qmbW6h2R${`7)?xS-yfMn0kW^`6Tt+osDoKa9dP+bYUCX3eD(QGkP`~JsT9d
zd$H<}OVAFg>>BCz%w6hyyDx7?lD~+NWDXUxPUERh>*rx`!%B9m)0IFIklBm60>~_e
zVYkosVxr4ocylj`vl#;t`40H$$`nH_5ywY3A_1C~d<{*8DMI`+D2a>8x5@=$11&NO
z`kh26pcnxC8IC^nIpX1{#BJmO`9sj$Spuxm9MPk)LhMPsL!?^@Aj9>N<$8zS82N#T
zjvgW^tP-)LipW$=^W5V(p1<oNQsvW9S_Vh|t~IJr97Sd^tBT`NS{auo4EyKqS^M8s
z6*}z#;@!2C(L67V>ihRpHJzccYj*(5g)L{aI9eJt(#7!%GL#z1ET?lVfIX9BF<p9O
z&OV-7Ld3;9a-8LKHeF5|*&g&)(z)@R@k%S>dDGU@m2rK-USgv<<`!s-vJL2=G+s>S
zIo=0Rysnngncc>rb%ISCipALsuPicNKuY+LTHak5pGXT6>z<Vi)bjWw(Mana)wAJf
zbbUgd8lQ${o|uq%RB_G;=x%0-O`!doA0ZQ{jaNw?$Wo&-jSrl?JoG760F&#(+Uq>(
zhM7Mk!#F?8zbpp**3S9i{3ZpFRdRaM*m-Vs^X;Z92`#$#)gGNscJ|usSm@%Q(+j(z
z+m{DTE6Cv#-jP{$@H#dPpMHy2um0ry(fa$NYtO8jFxBa8hjBki&aJ+GZtcbPC`%+b
z*8kgDA3AFTHJmk~VBu;Tjyc&8ym>}DaNd#(#{gCmK;;FhC`3s@v!rZ9l7Z>kn@B8@
zw|YwU2L~5HYOg;~ovwCC0T4Wruq!0WiNt3G(*=R|Hi1)nEo2#j9EFOIg1EOj;<heO
zrV?(;h-?3P)Ve{ItX*W<;Fu^e&^Z`w7mj3}L@*k&k=eK=L-9HZ^rHQ4bVGnhjElm)
z=oVC%D2mp+(?WWLlM~TADNhlT^HjY`)p5j-r_n{`fh#hXmdHyO2nw>_S0)(>7U3jB
zJk3kMRU&V|?MO1~ph7S>9t+CU#TyJ_NT5BlGw>;u3Gu;z&mp3acj1dc=Wd(NilERk
zEm9F(FbXB-sy!o3P&D^zE?2%sk~)dFsa^Q09)cz1Bl24`j5IDu0uMP#fBR84S;}}A
zVF*K6_G0^1!!BBjcG<33RmVq}v)pOkQnYIrfh)taimasyS9%hZZ^;222PfJEvcvE`
zsq{YS&}0GfN7Vg{D#~Studc1F<4@m7>u+yvN+EtM68Q9I-?{Mim31z}fE3p5#3ES-
zg`{%YWa-!N{QtjSBx=}v?Dp(ljdIq9Z#;JC!;+HlVRXGm@Qkypa82#2VeuF#>&>S#
zMEWM_UhDNT8!Ba8bIo!S8Vbr;I)1$m^uKGYP1mwTd#V#BYlIk+zZ-MZ9)LPHkacX;
zek8YHbcv8ZrS8;p={pnYQ4&VCq7D!e3uDbXcge)M%+l!+<qxU)UnU{9X_jokn2w%3
z0^(O+2{i>B6X1@&8)mXa$F`$q3`ETZo2aX+$0{<$^#D}A3hVwWScI_rDt!|r=TK!p
bECYxHN<4PM@lVe!_)Q;>{pQCv3HN^iWRRTu

diff --git a/anomalydetection/loganomaly/log_anomaly_predict.py b/anomalydetection/loganomaly/log_anomaly_predict.py
deleted file mode 100644
index 637bbd8..0000000
--- a/anomalydetection/loganomaly/log_anomaly_predict.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import torch
-import os
-import torch.nn as nn
-import time
-import numpy as np
-from anomalydetection.loganomaly.log_anomaly_train import Model
-from anomalydetection.loganomaly.log_anomaly_train import train_model
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-def generate_test_label(logkey_path, window_length,num_of_classes):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input_1 = np.zeros((length -window_length,num_of_classes))
-    output_1 = np.zeros(length -window_length,dtype=np.int)
-    input_2 = np.zeros((length -window_length,num_of_classes))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for t in range(0,num_of_classes):
-            input_1[i][t] = keys[i]
-        for j in range(i,i+window_length):
-            input_2[i][keys[j]-1] += 1
-        output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,num_of_classes))
-    new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input_1[i][j - i] = input_1[j]
-            new_input_2[i][j-i] = input_2[j]
-    new_output = output[window_length-1:]
-    return length,new_input_1,new_input_2,new_output
-
-def load_model(input_size_1,input_size_2, hidden_size, num_layers, num_classes, model_path):
-    model = Model(input_size_1,input_size_2,hidden_size, num_layers, num_classes).to(device)
-    model.load_state_dict(torch.load(model_path, map_location='cpu'))
-    model.eval()
-    print('model_path: {}'.format(model_path))
-    return model
-
-def filter_small_top_k(predicted, output):
-    filter = []
-    for p in predicted:
-        if output[0][p] > 0.001:
-            filter.append(p)
-    return filter
-
-def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
-    model = load_model(input_size_1,input_size_2 ,hidden_size, num_layers, num_classes, model_path)
-    start_time = time.time()
-    TP = 0
-    FP = 0
-    TN = 0
-    FN = 0
-    ALL = 0
-    length,input_1,input_2,output = generate_test_label(logkey_path, window_length,num_classes)
-    abnormal_label = []
-    with open(anomaly_test_line_path) as f:
-        abnormal_label = [int(x) for x in f.readline().strip().split()]
-    print('predict start')
-    with torch.no_grad():
-        count_num = 0
-        current_file_line = 0
-        for i in range(0,length-2*window_length+1):
-            lineNum = i + 2*window_length
-            seq = input_1[i]
-            quan = input_2[i]
-            label = output[i]
-            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size_1).to(device)
-            quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size_2).to(device)
-            test_output = model(seq,quan)
-            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
-            predicted = filter_small_top_k(predicted, test_output)
-            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
-            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
-                i += 2*window_length + 1
-            else:
-                i += 1
-            ALL += 1
-            if label not in predicted:
-                if lineNum in abnormal_label:
-                    TP += 1
-                else:
-                    FP += 1
-            else:
-                if lineNum in abnormal_label:
-                    FN += 1
-                else:
-                    TN += 1
-    # Compute precision, recall and F1-measure
-    if TP + FP == 0:
-        P = 0
-    else:
-        P = 100 * TP / (TP + FP)
-
-    if TP + FN == 0:
-        R = 0
-    else:
-        R = 100 * TP / (TP + FN)
-
-    if P + R == 0:
-        F1 = 0
-    else:
-        F1 = 2 * P * R / (P + R)
-
-    Acc = (TP + TN) * 100 / ALL
-    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
-    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
-    print('Finished Predicting')
-    elapsed_time = time.time() - start_time
-    print('elapsed_time: {}'.format(elapsed_time))
-
-if __name__=='__main__':
-    input_size_1 = 61
-    input_size_2 = 61
-    hidden_size = 30
-    num_of_layers = 2
-    num_of_classes = 61
-    num_epochs = 100
-    batch_size = 200
-    window_length = 5
-    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-    model_out_path = train_root_path + 'model_out/'
-
-    do_predict(input_size_1,input_size_2, hidden_size, num_of_layers, num_of_classes, window_length,
-               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 5, test_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py b/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
deleted file mode 100644
index 6286fce..0000000
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_predict.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import torch
-import os
-import torch.nn as nn
-import time
-import numpy as np
-from anomalydetection.loganomaly.log_anomaly_quantitive_train import Model
-from anomalydetection.loganomaly.log_anomaly_quantitive_train import train_model
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-def generate_test_label(logkey_path, window_length):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input = np.zeros((length -window_length,num_of_classes))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for j in range(i,i+window_length):
-            input[i][keys[j]-1] += 1
-        output[i] = keys[i+window_length]-1
-    new_input = np.zeros((length -2*window_length+1,window_length,num_of_classes))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input[i][j-i] = input[j]
-    new_output = output[window_length-1:]
-    print(new_input.shape)
-    print(new_output.shape)
-    print(new_input[0])
-    print(new_output[0])
-    return length,new_input,new_output
-
-def load_quantitive_model(input_size, hidden_size, num_layers, num_classes, model_path):
-    model2 = Model(input_size, hidden_size, num_layers, num_classes).to(device)
-    model2.load_state_dict(torch.load(model_path, map_location='cpu'))
-    model2.eval()
-    print('model_path: {}'.format(model_path))
-    return model2
-
-def filter_small_top_k(predicted, output):
-    filter = []
-    for p in predicted:
-        if output[0][p] > 0.001:
-            filter.append(p)
-    return filter
-
-def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
-    quantitive_model = load_quantitive_model(input_size, hidden_size, num_layers, num_classes, model_path)
-    start_time = time.time()
-    TP = 0
-    FP = 0
-    TN = 0
-    FN = 0
-    ALL = 0
-    length,input,output = generate_test_label(logkey_path, window_length)
-    abnormal_label = []
-    with open(anomaly_test_line_path) as f:
-        abnormal_label = [int(x) for x in f.readline().strip().split()]
-    print('predict start')
-    with torch.no_grad():
-        count_num = 0
-        current_file_line = 0
-        for i in range(0,length-2*window_length+1):
-            lineNum = i + 2*window_length
-            quan = input[i]
-            label = output[i]
-            quan = torch.tensor(quan, dtype=torch.float).view(-1, window_length, input_size).to(device)
-            test_output = quantitive_model(quan)
-            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
-            predicted = filter_small_top_k(predicted, test_output)
-            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
-            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
-                i += 2*window_length + 1
-            else:
-                i += 1
-            ALL += 1
-            if label not in predicted:
-                if lineNum in abnormal_label:
-                    TP += 1
-                else:
-                    FP += 1
-            else:
-                if lineNum in abnormal_label:
-                    FN += 1
-                else:
-                    TN += 1
-    # Compute precision, recall and F1-measure
-    if TP + FP == 0:
-        P = 0
-    else:
-        P = 100 * TP / (TP + FP)
-
-    if TP + FN == 0:
-        R = 0
-    else:
-        R = 100 * TP / (TP + FN)
-
-    if P + R == 0:
-        F1 = 0
-    else:
-        F1 = 2 * P * R / (P + R)
-
-    Acc = (TP + TN) * 100 / ALL
-    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
-    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
-    print('Finished Predicting')
-    elapsed_time = time.time() - start_time
-    print('elapsed_time: {}'.format(elapsed_time))
-
-
-if __name__ == '__main__':
-    input_size = 61
-    hidden_size = 30
-    num_of_layers = 2
-    num_of_classes = 61
-    num_epochs = 100
-    batch_size = 200
-    window_length = 5
-    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-    model_out_path = train_root_path + 'quantitive_model_out/'
-
-    train_model(window_length, input_size, hidden_size,
-                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
-                model_out_path, train_logkey_path)
-
-    do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length,
-               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
-
diff --git a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py b/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
deleted file mode 100644
index b4b3b7f..0000000
--- a/anomalydetection/loganomaly/log_anomaly_quantitive_train.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from tensorboardX import SummaryWriter
-from torch.utils.data import TensorDataset, DataLoader
-import numpy as np
-import argparse
-import os
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-class Model(nn.Module):
-    def __init__(self, input_size, hidden_size, num_of_layers, out_size):
-        super(Model, self).__init__()
-        self.hidden_size = hidden_size
-        self.num_of_layers = num_of_layers
-        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True)
-        self.fc = nn.Linear(hidden_size, out_size)
-
-    def init_hidden(self, size):
-        h0 = torch.zeros(self.num_of_layers, size, self.hidden_size).to(device)
-        c0 = torch.zeros(self.num_of_layers, size, self.hidden_size).to(device)
-        return (h0, c0)
-
-    def forward(self, input):
-        out, _ = self.lstm(input, self.init_hidden(input.size(0)))
-        out = self.fc(out[:, -1, :])
-        return out
-
-
-def generate_quantitive_label(logkey_path, window_length,num_of_classes):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input = np.zeros((length -window_length,num_of_classes))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for j in range(i,i+window_length):
-            input[i][keys[j]-1] += 1
-        output[i] = keys[i+window_length]-1
-    new_input = np.zeros((length -2*window_length+1,window_length,num_of_classes))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input[i][j-i] = input[j]
-    new_output = output[window_length-1:]
-    dataset = TensorDataset(torch.tensor(new_input,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
-    print(new_input.shape)
-    print(new_output.shape)
-    return dataset
-
-def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
-    # log setting
-    log_directory = root_path + 'quantitive_log_out/'
-    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
-
-    model = Model(input_size, hidden_size, num_of_layers, num_of_classes).to(device)
-    # create data set
-    quantitive_data_set = generate_quantitive_label(logkey_path, window_length,num_of_classes)
-    # create data_loader
-    data_loader = DataLoader(dataset=quantitive_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
-    writer = SummaryWriter(logdir=log_directory + log_template)
-
-    # Loss and optimizer  classify job
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters())
-
-    # Training
-    for epoch in range(num_epochs):
-        train_loss = 0
-        for step, (quan, label) in enumerate(data_loader):
-            quan = quan.clone().detach().view(-1, window_length, input_size).to(device)
-            output = model(quan)
-
-            loss = criterion(output, label.to(device))
-
-            # Backward and optimize
-            optimizer.zero_grad()
-            loss.backward()
-            train_loss += loss.item()
-            optimizer.step()
-        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
-        if (epoch + 1) % 100 == 0:
-            if not os.path.isdir(model_output_directory):
-                os.makedirs(model_output_directory)
-            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
-            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
-    writer.close()
-    print('Training finished')
-
-
-
-
-
-
-
-
diff --git a/anomalydetection/loganomaly/log_anomaly_sequence_predict.py b/anomalydetection/loganomaly/log_anomaly_sequence_predict.py
deleted file mode 100644
index 5542c3a..0000000
--- a/anomalydetection/loganomaly/log_anomaly_sequence_predict.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import torch
-import os
-import torch.nn as nn
-import time
-import numpy as np
-from anomalydetection.loganomaly.log_anomaly_sequence_train import Model
-from anomalydetection.loganomaly.log_anomaly_sequence_train import train_model
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-def generate_test_label(logkey_path, window_length,num_of_classes):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input_1 = np.zeros((length -window_length,1))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for j in range(i,i+window_length):
-            input_1[i][0] = keys[j]
-        output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input_1[i][j - i] = input_1[j]
-    new_output = output[window_length-1:]
-    return length,new_input_1,new_output
-
-def load_model(input_size_1,input_size_2, hidden_size, num_layers, num_classes, model_path):
-    model = Model(input_size_1,input_size_2,hidden_size, num_layers, num_classes).to(device)
-    model.load_state_dict(torch.load(model_path, map_location='cpu'))
-    model.eval()
-    print('model_path: {}'.format(model_path))
-    return model
-
-def filter_small_top_k(predicted, output):
-    filter = []
-    for p in predicted:
-        if output[0][p] > 0.001:
-            filter.append(p)
-    return filter
-
-def do_predict(input_size_1,input_size_2, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, num_candidates, logkey_path):
-    model = load_model(input_size_1,input_size_2 ,hidden_size, num_layers, num_classes, model_path)
-    start_time = time.time()
-    TP = 0
-    FP = 0
-    TN = 0
-    FN = 0
-    ALL = 0
-    length,input_1,output = generate_test_label(logkey_path, window_length,num_classes)
-    abnormal_label = []
-    with open(anomaly_test_line_path) as f:
-        abnormal_label = [int(x) for x in f.readline().strip().split()]
-    print('predict start')
-    with torch.no_grad():
-        count_num = 0
-        current_file_line = 0
-        for i in range(0,length-2*window_length+1):
-            lineNum = i + 2*window_length
-            seq = input_1[i]
-            label = output[i]
-            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size_1).to(device)
-            test_output = model(seq)
-            predicted = torch.argsort(test_output , 1)[0][-num_candidates:]
-            predicted = filter_small_top_k(predicted, test_output)
-            print('{} - predict result: {}, true label: {}'.format(lineNum, predicted,label))
-            if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
-                i += 2*window_length + 1
-            else:
-                i += 1
-            ALL += 1
-            if label not in predicted:
-                if lineNum in abnormal_label:
-                    TP += 1
-                else:
-                    FP += 1
-            else:
-                if lineNum in abnormal_label:
-                    FN += 1
-                else:
-                    TN += 1
-    # Compute precision, recall and F1-measure
-    if TP + FP == 0:
-        P = 0
-    else:
-        P = 100 * TP / (TP + FP)
-
-    if TP + FN == 0:
-        R = 0
-    else:
-        R = 100 * TP / (TP + FN)
-
-    if P + R == 0:
-        F1 = 0
-    else:
-        F1 = 2 * P * R / (P + R)
-
-    Acc = (TP + TN) * 100 / ALL
-    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
-    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
-    print('Finished Predicting')
-    elapsed_time = time.time() - start_time
-    print('elapsed_time: {}'.format(elapsed_time))
-
-if __name__=='__main__':
-    input_size_1 = 1
-    input_size_2 = 61
-    hidden_size = 30
-    num_of_layers = 2
-    num_of_classes = 61
-    num_epochs = 100
-    batch_size = 200
-    window_length = 5
-    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-    model_out_path = train_root_path + 'sequence_model_out/'
-
-    do_predict(input_size_1,input_size_2, hidden_size, num_of_layers, num_of_classes, window_length,
-               model_out_path + 'Adam_batch_size=200;epoch=100.pt', label_file_name, 3, test_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_sequence_train.py b/anomalydetection/loganomaly/log_anomaly_sequence_train.py
deleted file mode 100644
index dab9ed3..0000000
--- a/anomalydetection/loganomaly/log_anomaly_sequence_train.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from tensorboardX import SummaryWriter
-from torch.utils.data import TensorDataset, DataLoader
-import numpy as np
-import argparse
-import os
-from . import *
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-def generate_label(logkey_path, window_length,num_of_classes):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input_1 = np.zeros((length -window_length,1))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for j in range(i,i+window_length):
-            input_1[i][0] = keys[j]
-        output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,1))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input_1[i][j - i] = input_1[j]
-    new_output = output[window_length-1:]
-    print(new_input_1.shape)
-    print(new_output.shape)
-    dataset = TensorDataset(torch.tensor(new_input_1,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
-    return dataset
-
-class Model(nn.Module):
-    def __init__(self, input_size_0,input_size_1, hidden_size, num_of_layers, out_size):
-        super(Model, self).__init__()
-        self.hidden_size = hidden_size
-        self.num_of_layers = num_of_layers
-        self.lstm0 = nn.LSTM(input_size_0, hidden_size, num_of_layers, batch_first=True)
-        self.fc = nn.Linear(hidden_size, out_size)
-
-    def forward(self, input_0):
-        h0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
-        c0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
-        out_0, _ = self.lstm0(input_0, (h0_0, c0_0))
-        out = self.fc(out_0[:, -1, :])
-        return out
-
-def train_model(window_length, input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
-    # log setting
-    log_directory = root_path + 'sequence_log_out/'
-    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
-
-    print("Train num_classes: ", num_of_classes)
-    model = Model(input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes).to(device)
-    # create data set
-    data_set = generate_label(logkey_path, window_length,num_of_classes)
-    # create data_loader
-    data_loader = DataLoader(dataset=data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
-    writer = SummaryWriter(logdir=log_directory + log_template)
-
-    # Loss and optimizer  classify job
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters())
-
-    # Training
-    for epoch in range(num_epochs):
-        train_loss = 0
-        for step, (seq, label) in enumerate(data_loader):
-            seq = seq.clone().detach().view(-1, window_length, input_size_0).to(device)
-            output = model(seq)
-
-            loss = criterion(output, label.to(device))
-
-            # Backward and optimize
-            optimizer.zero_grad()
-            loss.backward()
-            train_loss += loss.item()
-            optimizer.step()
-        print('Epoch [{}/{}], training_loss: {:.6f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
-        if (epoch + 1) % 100 == 0:
-            if not os.path.isdir(model_output_directory):
-                os.makedirs(model_output_directory)
-            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
-            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
-    writer.close()
-    print('Training finished')
-
-if __name__=='__main__':
-    input_size_0 = 1
-    input_size_1 = 61
-    hidden_size = 30
-    num_of_layers = 2
-    num_of_classes = 61
-    num_epochs = 100
-    batch_size = 200
-    window_length = 5
-    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-    model_out_path = train_root_path + 'sequence_model_out/'
-    train_model(window_length, input_size_0,input_size_1, hidden_size,
-                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
-                model_out_path, train_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
index 7c010b7..a35446b 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_predict.py
@@ -83,7 +83,7 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
                 predicted = torch.argsort(output, 1)[0][-num_candidates:]
                 predicted = filter_small_top_k(predicted, output)
                 #print(output)
-                print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
+                #print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
                 if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
                     i += window_length + 1
                     skip_count += 1
diff --git a/anomalydetection/loganomaly/log_anomaly_sequential_train.py b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
index 3d87fc2..fe7f7d7 100644
--- a/anomalydetection/loganomaly/log_anomaly_sequential_train.py
+++ b/anomalydetection/loganomaly/log_anomaly_sequential_train.py
@@ -40,7 +40,7 @@ def generate_seq_label(file_path, window_length, pattern_vec_file):
 
 def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
     # log setting
-    log_directory = root_path + 'sequence_log_out/'
+    log_directory = root_path + 'log_out/'
     log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
 
     print("Train num_classes: ", num_of_classes)
diff --git a/anomalydetection/loganomaly/log_anomaly_train.py b/anomalydetection/loganomaly/log_anomaly_train.py
deleted file mode 100644
index b515ed5..0000000
--- a/anomalydetection/loganomaly/log_anomaly_train.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from tensorboardX import SummaryWriter
-from torch.utils.data import TensorDataset, DataLoader
-import numpy as np
-import argparse
-import os
-from . import *
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-def generate_label(logkey_path, window_length,num_of_classes):
-    f = open(logkey_path,'r')
-    keys = f.readline().split()
-    keys = list(map(int, keys))
-    print(keys)
-    length = len(keys)
-    input_1 = np.zeros((length -window_length,num_of_classes))
-    output_1 = np.zeros(length -window_length,dtype=np.int)
-    input_2 = np.zeros((length -window_length,num_of_classes))
-    output = np.zeros(length -window_length,dtype=np.int)
-    for i in range(0,length -window_length):
-        for t in range(0,num_of_classes):
-            input_1[i][t] = keys[i]
-        for j in range(i,i+window_length):
-            input_2[i][keys[j]-1] += 1
-        output[i] = keys[i+window_length]-1
-    new_input_1 = np.zeros((length -2*window_length+1,window_length,num_of_classes))
-    new_input_2 = np.zeros((length - 2 * window_length + 1, window_length, num_of_classes))
-    for i in range(0,length -2*window_length+1):
-        for j in range(i,i+window_length):
-            new_input_1[i][j - i] = input_1[j]
-            new_input_2[i][j-i] = input_2[j]
-    new_output = output[window_length-1:]
-    print(new_input_1.shape)
-    print(new_input_2.shape)
-    print(new_output.shape)
-    dataset = TensorDataset(torch.tensor(new_input_1,dtype=torch.float),
-                            torch.tensor(new_input_2,dtype=torch.float),torch.tensor(new_output,dtype=torch.long))
-    return dataset
-
-class Model(nn.Module):
-    def __init__(self, input_size_0,input_size_1, hidden_size, num_of_layers, out_size):
-        super(Model, self).__init__()
-        self.hidden_size = hidden_size
-        self.num_of_layers = num_of_layers
-        self.lstm0 = nn.LSTM(input_size_0, hidden_size, num_of_layers, batch_first=True)
-        self.lstm1 = nn.LSTM(input_size_1, hidden_size, num_of_layers, batch_first=True)
-        self.fc = nn.Linear(2*hidden_size, out_size)
-
-
-    def forward(self, input_0,input_1):
-        h0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
-        c0_0 = torch.zeros(self.num_of_layers, input_0.size(0), self.hidden_size).to(device)
-        out_0, _ = self.lstm0(input_0, (h0_0, c0_0))
-        h0_1 = torch.zeros(self.num_of_layers, input_1.size(0), self.hidden_size).to(device)
-        c0_1 = torch.zeros(self.num_of_layers, input_1.size(0), self.hidden_size).to(device)
-        out_1, _ = self.lstm1(input_1, (h0_1, c0_1))
-        multi_out = torch.cat((out_0[:, -1, :], out_1[:, -1, :]), -1)
-        out = self.fc(multi_out)
-        return out
-
-def train_model(window_length, input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory,logkey_path):
-    # log setting
-    log_directory = root_path + 'log_out/'
-    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
-
-    print("Train num_classes: ", num_of_classes)
-    model = Model(input_size_0,input_size_1, hidden_size, num_of_layers, num_of_classes).to(device)
-    # create data set
-    data_set = generate_label(logkey_path, window_length,num_of_classes)
-    # create data_loader
-    data_loader = DataLoader(dataset=data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
-    writer = SummaryWriter(logdir=log_directory + log_template)
-
-    # Loss and optimizer  classify job
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters())
-
-    # Training
-    for epoch in range(num_epochs):
-        train_loss = 0
-        for step, (seq, quan, label) in enumerate(data_loader):
-            seq = seq.clone().detach().view(-1, window_length, input_size_0).to(device)
-            quan = quan.clone().detach().view(-1, window_length, input_size_1).to(device)
-            output = model(seq,quan)
-
-            loss = criterion(output, label.to(device))
-
-            # Backward and optimize
-            optimizer.zero_grad()
-            loss.backward()
-            train_loss += loss.item()
-            optimizer.step()
-        print('Epoch [{}/{}], training_loss: {:.6f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
-        if (epoch + 1) % 100 == 0:
-            if not os.path.isdir(model_output_directory):
-                os.makedirs(model_output_directory)
-            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
-            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
-    writer.close()
-    print('Training finished')
-
-if __name__=='__main__':
-    input_size_0 = 61
-    input_size_1 = 61
-    hidden_size = 30
-    num_of_layers = 2
-    num_of_classes = 61
-    num_epochs = 100
-    batch_size = 200
-    window_length = 10
-    train_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_train'
-    test_logkey_path = '../../Data/FTTreeResult-HDFS/deeplog_files/logkey/logkey_test'
-    train_root_path = '../../Data/FTTreeResult-HDFS/model_train/'
-    label_file_name = '../../Data/FTTreeResult-HDFS/deeplog_files/HDFS_abnormal_label.txt'
-    model_out_path = train_root_path + 'model_out/'
-    train_model(window_length, input_size_0,input_size_1, hidden_size,
-                num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path,
-                model_out_path, train_logkey_path)
\ No newline at end of file
diff --git a/anomalydetection/robust/__pycache__/__init__.cpython-36.pyc b/anomalydetection/robust/__pycache__/__init__.cpython-36.pyc
index 9ec1bcab631d6f9267c860993ef65bb36cc9f272..96ca0bf2b56b979fa771187f7bc7de2942910feb 100644
GIT binary patch
delta 26
hcmZ3-xQ>z4n3tC;V<M{qXIf@XYH^HjWzIyu1OQ!t2jTz#

delta 26
hcmZ3-xQ>z4n3tC;Z6d1!r(0%DYH^HjrO!ma1OQuJ2cQ4|

diff --git a/anomalydetection/robust/__pycache__/bi_lstm_att_predict.cpython-36.pyc b/anomalydetection/robust/__pycache__/bi_lstm_att_predict.cpython-36.pyc
index 7fca6266b86e95905ad8d42b8cf807aaa6eb4d4d..b34a4baf061898e9124ed2557a487656f86f9311 100644
GIT binary patch
literal 4306
zcmZ`+&2J<}6|d^A>G`t9_N+g4on;XrXu`&1iA17kNp{Jy;$zpCjip5+C!_XMd)(um
z?%C=ZuWh<7OcHU45{_`<gcOea3Eaqm0~e&a!eu2c+|UXMey`eNJ0|p~zF$|rdi8t1
z_vW=`v-bHP|N8#l7a98-yYN`(-$BWKk4iAXW7aKO-sNU&b#2o+UDvc;*E4Np=y!e7
zt9GlVt#xZ?t++mFbQ_~)x5;TIJDwZOcjq|^*|3GOFkGB6ITlW033tl7E#ZmE2duj!
zd{IT)7Bx{vdqp%v6Ya8?6Z2?SL`y87T^0OY)>(W8ec5Km(#`EK$x?YQP(c<dU8Bx}
zG!UWeaP2-!MHp+RKM{ep`{PNU4or`WGKcaGO16heu_-Go#d8>8YJKvcu#fKL_RyNL
zd+fb83P;(>5tgt&V8RjZ2P}7n?vxEZ;Z6BHHpM$rUO2fmtSCEoFy@>+S%1tP!w5fR
zkJ!&xZb9+NrslHG@j-g1bx__#$(B(mrns_lHng#~Gi9_Z-r}1bSG#I5jzevag0Xh@
z;xtg&&6JEV%Eob|a9*B##v~o<`QAHk?mRr+i{dat8;5~RqGYf$I)0kUY(E|E#OWYN
zg7`R#vYiL%;O?v+B<Tp#B2-~tMQO4l)7?p?c6Os)oT*VSP^veUp@{lwV|=Xr+i@`3
z6~S98bOaQ3a&-_Up$t^GaXU`?L7csXPAPPDaJq)GgRf&SU^#oQU2w&6m`<<*M>;wG
zmG#7-<4ZCW&S%GAf;+c1Ue)}_9JH+FvHZMtJB~8dPe<cY8Ty9DPS(FnotfZsezW6i
zhhEf9It~+Em0=*_C<!y^V*^>Gs!mk{mG*)$qy(<RNrF+Rn@^)eq)&TLY@qh_BFx%5
z2#>Q~7CxDTNk7aq-=hz}By=(7`f7is*b?S5+MRKaH~1WX$zJF3D;W0)ru29yq>h2>
z572>~V5!{7ZNUkaAA3Up2M}~~cL=Bs-P|FdevCQJFy~EyHdPVc(4WCGcMTx7OY2&?
z747wQBN55+=7U(*b_3Pl?`6?RC`koeK?CsX(Y+ElPUc4BV7+mm_TRkm^P*FgbgSA@
zsrE#881+Lv52T7-rT~Z@p$0fkWq%(w!Mt|DLwv^_%P3LW+e_srP#sU#qGUW#=EJ(a
z9|;jA<)D^KM!h&V4rQk6)bGbZmch!kb4|2o`nN6{{7R{bxfm4%anFd6owFJ$W?kiN
z?!*4_7Di`OK_{|NjO|}VX8@u&dMPJjxL(*f7krK7_8PbksI7qlFf#66dt5rby=hE5
z`1fc3{L91Lt+xT_HSE)IboJDgLZQ8MqHtyyA&ONfH6LHrf3aNYWpL2Ar_yon;2YSc
zRGvG$##^NPcQALRJROWcWR(9iI)WG007n?$OcpEb!YN$EhTuJb$HQvn=+nFc4g#+o
zF!_hv64vWDu&{*#*7Jmm!&cV7P&4K${M;8j=O0>Ytnl%sKg9^|{xbK6j&cPbz@>BI
zHLR?%HogQ%+Cz`V=s81ARj@O5rJYOe%q!-*l_xJgX3AI9VU3JxT4GnLsOD9`*Dp=`
zj0ob(*`<vY!!C^(yOgYAtV^5%<aI2G$Ozm<;*!xNy-pU*PU+;8nb-D8VkAcgAa}#~
zBHhjSMf>lb^1yR`L%T+<Q`gp5XrG9t*Ut{+0=7S8m+(k|T_lA90}=TK_2|>`B~;qO
zGNN6lrmMv5#_{z0jB92rG*6C^?m@LIgO|cNN_P^7jk;dqtVe@xbrfU=rmJg822s+Z
zZ8eYNb^4r7LfA%KvWi~&zgGAb-plA^21K@iNe7tp`8tTLfx5<*(JpiOLr9#Vl&GzV
zVo>@KIur&>9Bxre9$Gn3n|&Tj;eyW5j&Z^5!o|otx<kZQMol>8t3`znbYRK%b8qMh
zcff^HBJar^n#pbD<&`O1*SP8(xGIq*P>;C%GL3*PZ&OyFjqzUjI(pGf0PWd-@N_DY
zLY{VpS(-@lL+!%s2#KCbDV~`7(>6MYFgC78w?LEd?4)-X_DefAW;4d>3x>;j>UbQ&
zyGd$Bz~<t3rkhzf3KA9ddlXQ#8y<$q|J=>uAXL3m;yx9>3-wHX!C?Hhp$dmD;#wB1
zWulD7SnpG$qlYf<>@scxd*`txcc*Sq#f5MrOEr*19i@TNM42PfKy0y!d89z|WPT!z
zkDa20)CsvB;*W_#pYl%}<c7RhIQkbHzeNeud;i$6iba&AqOJUV3DjT}S0G=_ui)yJ
z@}*%-EaVPo${yBL!^~ClCCU{9`_K}65A-u^0swO;^C&IoxSTJG%7HUnP>XnDMJ)~6
zWuBS0P144Tc3#g{gm+-&ZK~UBcxB2!c=fz-HqS52H}W~mW9E|lT;4pJzbN0#7leoX
z7SPrZAH)`WBEFCo*7N2H%Nr<jE38<BKIqdIKIQTku+m!667{^^X64E?R;-~1*;AR{
zLFVcO8LU9&YQC0VJ(KwyGS`x}Xyj|y=PHU>h1@mlh3^a6e--0(+*Nb9jMLB2$`oXa
zdvnAb=GN)0{My;vyje47$ZM;tcmZ-P$h}ZfxIkXpSu6>9`#z#wg>X!4@>M`gSF-)d
z-X7SdHU{sE!ci)ZPnxFz@dgqGsf^DJe)DU3{`B@2_?;rMNt;{f7=HH)1epumUK0E%
zv-9Ii6lREhAMehH{kllc659)j>x{{x-+si|Gl3VL5gnggzrRHp&&?b6H_LYWbi8fG
zC*Qx@?_=T3jo0=dw1u21A}*dyef{Q*M<J5q_`<~fTd$15Ae+eWZ1QC3ew0Mnekf=Q
zN}rI$pR~d_7-ymAsc1y9h5Qjz>a3W&MLRTW_*IgTWQQsfJt|4s8%Shb)eux=a&+VR
z@(x6_a~OqB<=061MXD&_({4Xb!8AzbRM6j-4`|K{GU6x`o3xeS|3@HYa14^0jO5FZ
z=v<fIq#bTj^$JxRRDF*siVu2Dg_$xGXd+mNhcD10v8ryBf$cn3(=`JNb8c;IZ)xj3
z%JwGOO}%gy24_)A*G-SoJ|t9bKb;`SbTSo=<xfbL+f)&V<ZD!Y8<n>2K6s${mgbMJ
z=`9TVeciZd?@Kt+wR7BTVT0@uDrU7TA2e%zcFl|$7%e0I)j+^6f^^$__4Mb1h+Dj6
z&2bNs=XD#SHspMewmiYEoi%fh^2g|FZyY2d$mDl1F!v%unzqtR`zFKQND|ymBHuUC
z64L2Nx5}l7isEd8tWzcz1e9*)@_)aLGR4`r_}|ZD2`0-gQ9{Y3ZbVrxI1D1>H*skG
zxfm##RJ24QMHq923+6Nbhv=^x{zW1cpTyx?q(^oG71^Zx)lA#s4SUt`8$LaY{``Ld
DIBGw(

literal 3240
zcmZuzOK%&=5$+z&@FkL>B<iiz((f>_l^7c!3&SwhSVsr~ub_AhY@o;)jMyV-sNoFN
z)3hya&xy_<2g6SOMgBy7NMCc(HOR?_BwsZ}Nidc~KdQRAtGcS@tNyB9ujYS!_vhU?
z#{R{w{T$4{LCX%%2_|^V_9i3W<Cb=Mg}s8ExqB`~C-(Zqy&`8J>y^;Ty~>!$Z$u$+
zggfSYCE<zUJGNI9B~ivWBPybbu_pNUtTS^CJ#44LjrUzD!q~Xop$JT&I~aD!7W#Q;
z4YY63vO{!=jalv}u3_KU`S3h1ynUt%J!j0Gu~%Q`t|};3IHK^530HXUn09;KnDvUH
zIOfmT7<<M%ceT?ise*=$W6s&>#t-ZV-1HCZ1^Xk@4s0v!7%saU3h76qi}pQQwuDYG
z#g(I3uYj}NF{4wli|=$i<Eh~w4o#sS42*Xer-3qFrep*v8^n>qeR=W^CTlq7)8V&|
z_P;yZj^Z%G_&f|`5+z6b{j)cz%#PE+ew-c!Nf4i9QMUg)J$gEw2T9roTZAg?swhqN
zWqL5o)c!%_$C>K;fl~fJh9c^!t-+ZoKaPX`fe4<=(-qLzXzeIWLK&!V>v5cRgE)JF
z$wcV#;uMC<i*JB9Vw$~b<y<ih*9mswN>}s0Ij1gNUy`G6zd{@)9mhCZzcT!-z38N?
zWBF6{aU5l;oAw72V@OAljW&MDoT=a^e!o*TE^Rb!ItUX}kzpX>C<!xJ!~wEIU4^<T
zI#Ub=kP--on*{yP)ZavjNZ<G{>_{D(dAQX-3C}V=3r~k((hW1i4@m)BLV>xm*OaG*
zQ66Tr$as<0c!S?9Y;gGn=q{xx{m7IunoZZ=VFEwFQ`*r5!6_}@7kfw?q@bt09#XaE
zX_pf9J$RgfR~#eTR7n(j<!O3q&nD!{iFX~-j1K*SNJMfH`5-pcgFtnU{VW=VlEQ@$
z)R2BY#g&p{)acU(<_`jO{HXn7-l<6Px^Yx$iXwa+bwe|YOcj2nkPtp)4e~gZ-DCI!
zeB*|%p~oA@C{d<(n96>jIz>~BlEF|}#b)L>5+Y0{L^T=q{Wv%aWoBk*-i?DSgO{sU
zmYAZQKm6R`ixW$1U{n;uz7<=8!5PJtIcvPd%kaPaC8)0DVR8?Rf?<8{78r|IUWNZF
z7ARiiGs4MhZ@2YKk6W;Kl-G5=hnS4{zLPidS%MDb=~;>a_yi6Vsiv;!s%}1Jx+c73
zme)=<ufv5ZO(U$mv6s)Oa<2@CnuoRpv=-VT+7jBbs^}7;>f|d&EMJ9f0HqJ@d`%QZ
z2~f4xs}fX^cCrw<SFd16&4@DPh^qDKWA>4M*yRA=d>w7$BY*ouuRH_y)vxn9I5lwE
zx~3YsKIYn8VJGaPBRBPisI+j8Wx(Pxv$y#|uM6)tEMM1k+_Ogfzc{Nb-?TI+%)f-r
zS-pwg6V>-luPNrWtJ%2Fn^W_!a#PRgO^j_wEa<lI#*V-lx;z6=wWr;H^)tPeUe`Bt
z6~O!Dn6F~L&G|3%>Phh<m;WT~WD~5ZC1vt9&GfPzm#_Be8z&{m|5s0U{zorbnMLec
zva_XR<6_m3^SWhuEvzos*}_MD3c!aqNW;eIzpuwrFX$G0<D59~*ze)Xn@LmD^aA{5
zSJqj66SJjDnFKPouE}5pGPm?ied|)@?~u8jv_xIs#5uRn>?-7L<E%x<)xcW@eFvUu
zz*8-~L@Q%tA!0rwX2H8dyY%f#Uem7GeADwQEWZo6Imq3elr@2R)mhk)1Wpz^Muk5;
zdi9C7&k3e~8sl47u^3;QV*CW#?Keuuj_<$Y$R-_$_rnEFTErx9msDQ9M$0ZxeN&{D
zW$0R&nd17Weo>>^03NB%>05>tPpGp&IJ);^-hR-YO2{x9#){zUep|_5*p7pPFedt3
zVB0xq9BppzP9(N>CgaNs`lY2uzkS;6V&T!&SBE%e7Z4g{0LUrp&i(d_5VeqwwK8CA
zKYY*+gKQ|nOXg@{J4&MLI23dQp(Tp;s2Rq=APa@BqCUyy=l{d6vnUr~nWSfftxebJ
z6_tuf(IO->E^V|0#gwPzO%klh+q7ItQvXN>Le3IZRAG{((zvgq@Qu7dQa7zsAdkRT
z@@Jqr_hg4weooylsJl<yDs>OgnWc+&gs;L(`BeUFkuVKQAL3ndL0b$D5)la$h|Dxm
zN>nHl|8>~)NzBaAOr;Z`_bGm+HWji0Jd*T+Fr^EwDPV_bOl3fz;F#v5AYb9WsoK=A
zPBqTUUE^${z1+dLV+v27KR2_%K|%#7M!gF}C{5#vK`A2FNo}Q@4in|$9%imPlrjWi
z??;z5CF*yuR)Buv+0-({bf{3?Oywd4Lt*%?;V+>2Ayjv}rglB#pVzVKRqo${1=$E4
z<1U^KE{a!!ub_x6IZdYwdit+oR^_*`LQ*8>I!m_PHNj~^b`Ae3Z(vsD^O#i~{LwdX
zR)ZID-c{cM)v~*5T)u+#PJNO}PMVprt?ye&Vw@zA^wI8hK7UniO<t8-*I$)N22o<)
z4>sW>DlId$DD#8YK?KLelN7ea*($x}mrJ0)gA`SL7>7@2dq#Ld7pl_53oCB9R<8UX
DQ^`ly

diff --git a/anomalydetection/robust/__pycache__/bi_lstm_att_train.cpython-36.pyc b/anomalydetection/robust/__pycache__/bi_lstm_att_train.cpython-36.pyc
index 22214b0493bcb0a57a8dbe6b3d15daccd19b4a02..54870d8617ed723d1e27985122e17b443cb10d6d 100644
GIT binary patch
delta 3162
zcmZ`*O^h426(;9jBaNipmG{r<WbJia%kj^qKXKz4c4E7UQN(tVHL`|cdpax5YBiQ-
zv>dLzRw<~c+!ip9G{yiu7C~+WjNV%m1#&8oLx5fyfIZY`Pd)Wqqd@!KNb%MIIs-mF
zK0cC<?|qNtmk0lLZ1^qDbN^cU^0vL7kiV1v`3&G+hm!sV2qTPkNMl^n2F=@AL(gj?
zG8<;jS&`kabIys(sL*h8zSt;2t#wLWui;UgL+_Nkl}4poZLC*wfzg@jPB*40328I~
zr55cQ5#DEJtTAgu8#8YaW;5pjVNPg9vm?DR#|q4SKpOL`$VyNzFprg?USxEQ)GLqv
z5YV&PGCj3A(X)R}4AO9*uc*u`2@5+-8#aa?)(#XMO8VpM9z78<{X<I<-<Bpn#|+?{
z*kUHu0`?^}Ng}64q%i`{cO~i;p{FFgsLV{%jP$Y2NFPz?tuO*hTUW2QU=jG9oUiNI
zzv)sz*^DQ>WJhH4+O=a-@lMxIHhtFSVN<k|I91l$?Klj$(l(pQ-3UZ;%TL?=Fngq(
zv8pf=pUVEEEw9W2Tc@hv`^`>}roR7c@|0aXbMwmm&2}eDq3&cgef`A>=uJaOYd{Ds
zQw#oiYEVbBXpLqM_2_sBdbkIq?kM`EGDHw>DLYLzMK{>xi_oF0ZXmil9ln$;8?(m`
zpom^p^opVZKZwI~atP&KAVe7BX8br5E7^wez2P^ZLy!nO6KRr;w1EaKU1}pb(1A0g
z&M4DHdfqZ6mHJ2n|LIqAvp5=9w{lDP5nOEv$zdelKynmF-QZ{zuOm5z<YgqeUFY|#
ztMO!GCl&s4MxuI(kRV!_y5OY#Vh)*;6XmRLY~RTJVD>|vq<jje-iHK3qBM~xi-mXF
z&2VCdGB9XL+gepxbM@;kDz@Xc@VDBGh4CxdMe{UmWIr<3t_nS}(J8QX<0_{(KZa$z
ze}`iT`L~h0f&>j3@BajHb3h<!?N&Ewv%1L-WS+G&pYMEc`}StS?**Kl&fc&N(L%<o
zmxc%tejG_YPrkrwIGHyAS2OH%e1C5fb;_4f9mCDhZRn2F1>(@slw-qh<?<$`7=E|}
zy6Ym8^a5<GO)%6_V|0{Jn^%4{@OFPGy@-y;C35Rw_7nTnBa$+6j@%*qi|vQImxLp1
z31@?(^JGLpX?0-AVpPbU*!vm<sY6l%aOTJY{3A-q_LFt8{cxSE!vXXzd5_#6!WG4+
zB#S^zR{T)QTcr`gu`J8r^U_sCdG7YU{=u1BPw6uEvTKgN@HHnoXFEZ6g9Yc_&;IDh
z{m=J4b<cL%sc0tM9`xTXeCHiVQ<I7Ny|C^nBk6^)D)2C19mw;PUjmQt*O9z|<N^}(
z5`PPbGI<cULS+R#7{)ldlP_lfEc|}u`9;-qD~v-Ph|o{NZNC$2gq_RT7w(Z^RnzD!
zoz;%gCCURB|12hDXdt@sm>i~<pm{vBUsNGB6cQDVvVpPZKc)@L7HJWU46K0-ScQWU
zG@RYfg()oQfG067zZ(>!$@J4?U@~LK%m+j|%vvC_us|Z4*{B4=T<J163x29Ckbw)s
zT?{P*`%~#gjwmp#1=C47?wKdGJg6-)DDHk&x_S6Zvc$AI8vh(%;Id+iGB~62&_70r
zG(=GjA-)qlJYFZ#0&x*lq#e1U1crf8Xc*cKn0a%Z2v3xwinM@MnF}bv78ngE@uZid
zgrCDWfXBw^*+dRM4GLbPYf$p@wnc`L+$s$yxE2<8q-`5(<0A<$p|@jE*Z5^nR$80w
zrPnyGegN_aC&F1R(m(Mm!jnVkS)fmz?Ig{hlb!?SE95C>E!^G-<6$%8=V5~OaWGPu
zIWQo7-v-e?{sUiqieCX<8Bv<V%IG8kQ(BKH2NCKw)4Ti{bmw5qp{lHK7fyEus<0G)
z2gmBb@C{TO;}f}ekpP&7q+aIlp;iU#1%sNQ-wETE*y7hw<$WNHsqqztDdLO7zZ*9B
zP3*jd<TeoH3Ldm$AFnpPj?x=I8eST91Gs6LKIW1#!@F?r@E>CT!3i0TPbszVf4zr>
zr+9%7T7nDFfb-v_)6{`{DZ#&#OB_0_&4PRpvuGQ1n?)P}A3US-$3&h*kHN{(W^f7(
ziVUteX=Nvh_fAg>Pnrn(kLfLAFvALi+HPNzWhKv-_@*}42W56JCugI|h(uK=Q>>Ex
zrMNhZrdbv8U@n>gp)QOZhzoK$s*MQ1cmW?yP}hK)9T7;n8uZPfZ2MCHGiAkSUd}Lg
zj==F)6AQ8uEsp4Zg6|X<H|d!rtN@wxk=P$CMF%jIKc?wTXwet4JEfJEUyPRFa6UM~
zR4&0(i*tl|b7Z8!G9N)+Rii_=BrI3Xep_0j^O@%zrC#=`SO4HCJ=NDw_C$Z`ItH;F
zx0W}dO1DDBH^F;3sju)Z@XG9QNQ?aP;HR*26$yHrV`gzo29EC;WnZ}X{p>UE1$r%e
z;vM$v6wnpI?F^tJnO;6N!~>LbRMnDH8NEPkDYKozQ%gDB;7$lkI)2<3X>d1GZYuEn
zWbLL<X0wx|VSPW3agrC1;6<O``>H&7wB@J`Q{`ei>?O^ul;g7uj`t)H+2eAZUeEqq
zz7Nk-5bA(`9s-y7mD#1Q@Hv5Jag6USzEk_zqso!%i@F0CL<BC=D!fQsK;AM)_$M$0
z7>4i4$<wSC#4Jd88zjmR32$zljN`GBisNR|6YVa)pZ%-ylC^>Zf0iAtUZ$&Aw|e-s
q00p^+KS6HKvYa~vGUc2F%XT{9Idp%Dk1sSX`>Z-+o^}?U`Tqj=yWti9

delta 2054
zcmZ8iO^g&p6t3!@nV$dI-C<^jU1azTBLW+Lz##H362q<vo9t!;n`WwDr|Ib)staV-
z-Ym&rR3ZedM7e2*7f*Wd=)s#;PnuNX1y3Bfc-#xV*NYM9PJQ)$x?a6{_1^1;FaEJ<
z<&{z?zvsry4_{hG$Y11{#{&8=jQA@6Mi>o9XKkjw(J@kq>02EurEK5uGo5ToI~@mR
zBgpmhojk=gW>DxCJH>vfQ%ZR&DEBL!3MKpy@vEHpBmU@+2%p)Z!7@YIslH8EmN_>G
zbGYG;4b4uC<yihE>8xV~R)o3E=o!*3-TU36ud3JSo*kbOi*&Le%V~Q$Vm#<(!Et<W
z#$m#U6ZJLSHGkVqh?|wR?<58@Yb3FT21s&A1~8x4VwRae6~YX&aE337vgDVUg}e_s
zzfdDXb4Wp3VFb3dXJ*@mY8#Vk#&}o#XpE1Jgjf4+bkSu!!Mm~-g|Yh8*q^C@8=|VR
zW@}3w$TrnE*X;&g9J}uKWR)G;e|CEDL@(em%%{{l=G?|Id{;&9YXF3nX$Iap9R;+g
zW2j$DKT`mWD5@!I{JkcSI@9-L|7sv6kjY8mh4CPYxi&BK`)x~1qL`l4bXrqSw18WK
zO(-vUQt}X8c0(>(>b~`c`pKS|PYv5kTtu-ze?$WT52Otl=?vr7dR_jnbt~>IacwM2
zX`?&!{1$~?*pu!;k1-zZP@T*^I;+0SoLNZ>h@~|nC|WKe;KABiF^2TZ2zwCF6|o&*
zCjw+IT*2C@>|QvyDzzB_P`8tX!?-{{??LurBtG&4+YL+OJOD8#Efr_$)KRyx+k6cA
zn(=hky};_y)0^t@AaLFPEVilGh@WHhgad#viIH$<vFbQZCY5$E1@S>Ox_k^qd|b7h
z8<lUDOV{rwC7%w-adPep^@FqbAS+xZ;y39eP!p6~KQ41Ji*!QA$&kWlQ_FTz@bi$i
zHoUY!J~5i0p(H8#d9^7wcH-e2xpHfc%t0BxPfnA0A`7zUml8yq6>l3UR~*uzv0P5d
zSW`2aiZuR9<_G)Ft<n~x)l$xFw!qo7Bd1B?90<Jr1?C-`R)6P`iT`U?^9O=nEW1&E
z0Q%4Kubq8_A%<kPnOq$3c3E4|fQLFKxW|HC$YXH??Z1g|5aAHQVT9uV+8&5rD76iV
ze2MFfH-OblU_eDvl?y*_X(5?H>fY!j9&+JH?#BFz8+aFZa7aBaY%dq71GQ5(cF;*G
z&MU9dqz}}`mCyD$vXs~u*1P1~>hc)NE!VCOWI3s%p=M`|<#jOX%Z;S&SBB8IV2rS;
z+FxD2k~Dl%T4@xU$tZR+xPvBiE}UyX)rMprS+0R@9m=jem?M(1f?rR@SiV8v_G+?`
zRQ&NFok$xCtV=blSSKuZ*${VR(_imTB+iiDrSWOtCXt4QYDo2Gb^Xo_{tJ*l8;2xu
zSP4w7ZxB{)kf8zF-I*gv)o<a}ux&+c9yvqn>e|Q-T2$YRv{xQ6h~6sQvt;fb$fc24
z;X!?dms%HLiWfM0^aP&vCT+%2h-r*8Cb!syfCVJZBj7(n7ziT(x_S&6Mf_GMMKoAE
z1*Ep4f$a6Qb%c3+oga9@>vPFPY={|kbabndqwN*EZQ<d1BZ{>(@Z^HFdogrT?ex9N
z9EdnAe{IFyHLmlq#9L;)E@VeHfF9SL5U0_|1_Ug%v{~u$(|(-xS{~~>@_Z0=7h-{(
z7;-U+q?#OS(=+PC*y2%%LKD(Q+iCvj473q!Ef(wohv9AzUHY#Hq}8tr&}w_3+s9wf
zym~UW{b<#6AmxhiwrCZacODY21rl)&?6(_hXK`dCx(mC*a4mF&H8u^G@YP~%TV^vJ
z<eK`fc9QN^k84{`oJRrLc*cbmvQNtsT(}zWgLoe?p3i}u^WX*gQZ<XlxG_5AH2(oi
C6!9AX

diff --git a/anomalydetection/robust/bi_lstm_att_predict.py b/anomalydetection/robust/bi_lstm_att_predict.py
index 2175159..02b203d 100644
--- a/anomalydetection/robust/bi_lstm_att_predict.py
+++ b/anomalydetection/robust/bi_lstm_att_predict.py
@@ -1,9 +1,14 @@
 # -*- coding: UTF-8 -*-
 # -*- coding: UTF-8 -*-
 import torch
+import json
+import pandas as pd
+import numpy as np
 import os
 import torch.nn as nn
 import time
+import random
+from torch.utils.data import TensorDataset, DataLoader
 from anomalydetection.robust.bi_lstm_att_train import Model
 
 # use cuda if available  otherwise use cpu
@@ -31,16 +36,48 @@ def load_sequential_model(input_size, hidden_size, num_layers, num_classes, mode
     print('model_path: {}'.format(model_path))
     return model1
 
-
-def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, test_file_path, num_candidates, pattern_vec_file):
-    vec_to_class_type = {}
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+
+def generate_robust_seq_label(file_path, sequence_length):
+    num_of_sessions = 0
+    input_data, output_data, mask_data = [], [], []
+    train_file = pd.read_csv(file_path)
+    i = 0
+    while i < len(train_file):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        input_data.append(line)
+        output_data.append(int(train_file["label"][i]))
+        i += 1
+    data_set = TensorDataset(torch.tensor(input_data), torch.tensor(output_data))
+    return data_set
+
+
+def get_batch_semantic(seq, pattern_vec_file):
     with open(pattern_vec_file, 'r') as pattern_file:
-        i = 0
-        for line in pattern_file.readlines():
-            pattern, vec = line.split('[:]')
-            pattern_vector = tuple(map(float, vec.strip().split(' ')))
-            vec_to_class_type[pattern_vector] = i
-            i = i + 1
+        class_type_to_vec = json.load(pattern_file)
+    batch_data = []
+    for s in seq:
+        semantic_line = []
+        for event in s.numpy().tolist():
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event)])
+        batch_data.append(semantic_line)
+    return batch_data
+
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, sequence_length, model_path, test_file_path, batch_size, pattern_vec_json):
 
     sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path)
 
@@ -49,44 +86,30 @@ def do_predict(input_size, hidden_size, num_layers, num_classes, window_length,
     FP = 0
     TN = 0
     FN = 0
-    ALL = 0
-    abnormal_loader = generate(test_file_path, window_length)
-    abnormal_label = []
-    with open(anomaly_test_line_path) as f:
-        abnormal_label = [int(x) for x in f.readline().strip().split()]
+
+    # create data set
+    sequence_data_set = generate_robust_seq_label(test_file_path, sequence_length)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+
     print('predict start')
     with torch.no_grad():
-        count_num = 0
-        current_file_line = 0
-        for line in abnormal_loader:
-            i = 0
-            # first traverse [0, window_size)
-            while i < len(line) - window_length:
-                lineNum = current_file_line * 10 + i + window_length + 1
-                count_num += 1
-                seq = line[i:i + window_length]
-                label = line[i + window_length]
-                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
-                #label = torch.tensor(label).view(-1).to(device)
-                output = sequential_model(seq)
-                predicted = torch.argsort(output, 1)[0][-num_candidates:]
-                print('{} - predict result: {}, true label: {}'.format(count_num, predicted, vec_to_class_type[tuple(label)]))
-                if lineNum in abnormal_label:  ## 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
-                    i += window_length + 1
-                else:
-                    i += 1
-                ALL += 1
-                if vec_to_class_type[tuple(label)] not in predicted:
-                    if lineNum in abnormal_label:
-                        TN += 1
-                    else:
-                        FN += 1
-                else:
-                    if lineNum in abnormal_label:
-                        FP += 1
-                    else:
-                        TP += 1
-            current_file_line += 1
+        count = 0
+        for step, (seq, label) in enumerate(data_loader):
+            batch_data = get_batch_semantic(seq, pattern_vec_json)
+            seq = torch.tensor(batch_data)
+            seq = seq.view(-1, sequence_length, input_size).to(device)
+            output = sequential_model(seq)[:, 0].cpu().clone().detach().numpy()
+            predicted = (output > 0.2).astype(int)
+            label = np.array([y for y in label])
+            TP += ((predicted == 1) * (label == 1)).sum()
+            FP += ((predicted == 1) * (label == 0)).sum()
+            FN += ((predicted == 0) * (label == 1)).sum()
+            TN += ((predicted == 0) * (label == 0)).sum()
+            count += 1
+            if count > 100000:
+                break
+    ALL = TP + TN + FP + FN
     # Compute precision, recall and F1-measure
     if TP + FP == 0:
         P = 0
diff --git a/anomalydetection/robust/bi_lstm_att_train.py b/anomalydetection/robust/bi_lstm_att_train.py
index 0416371..75509f1 100644
--- a/anomalydetection/robust/bi_lstm_att_train.py
+++ b/anomalydetection/robust/bi_lstm_att_train.py
@@ -1,5 +1,7 @@
 # -*- coding: UTF-8 -*-
+import json
 import torch
+import pandas as pd
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
@@ -17,25 +19,32 @@ def __init__(self, input_size, hidden_size, num_of_layers, out_size, if_bidirect
         super(Model, self).__init__()
         self.hidden_size = hidden_size
         self.num_of_layers = num_of_layers
-        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, bidirectional=if_bidirectional)
-        self.fc = nn.Linear(hidden_size*2, out_size)
-        self.batch_size = batch_size
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, bidirectional=if_bidirectional, dropout=0.5)
         if if_bidirectional:
             self.num_of_directions = 2
         else:
             self.num_of_directions = 1
+        self.fc = nn.Linear(hidden_size*self.num_of_directions, out_size)
+        self.batch_size = batch_size
 
         self.att_weight = nn.Parameter(torch.randn(1, 1, self.hidden_size*self.num_of_directions))
 
         # self.out = nn.Linear(in_features=in_features, out_features=out_features)
 
+# att BiLSTM paper actually H is different from the paper in paper H = hf + hb
     def attention_net(self, H):
-        # print(lstm_output.size()) = (squence_length, batch_size, hidden_size*layer_size)
+        # print(H.size()) = [batch, numdirec*hidden, seqlen]
         M = F.tanh(H)
         a = F.softmax(torch.matmul(self.att_weight, M), 2)
         a = torch.transpose(a, 1, 2)
         return torch.bmm(H, a)
 
+    def robust_attention_net(self, H):
+        # print(H.size()) = [batch, numdirec*hidden, seqlen]
+        M = torch.matmul(self.att_weight, H)
+        a = torch.tanh(M)
+        a = torch.transpose(a, 1, 2)
+        return torch.bmm(H, a)
 
     def init_hidden(self, size):
         # size self.batch_size same
@@ -52,12 +61,12 @@ def forward(self, input):
         # out shape [batch, seqlen, numdirec*hidden]
         out = torch.transpose(out, 1, 2)
         # out shape [batch, numdirec*hidden, seqlen]
-        att_out = self.attention_net(out)
+        att_out = self.robust_attention_net(out)
 
         out = self.fc(att_out[:, :, 0])
-        # print('out[:, -1, :]:')
-        # print(out)
-        return out
+        # out shape[batch, num_of_class = 1]
+        # add sigmoid
+        return torch.sigmoid(out)
 
 
 def generate_seq_label(file_path, window_length, pattern_vec_file):
@@ -75,8 +84,9 @@ def generate_seq_label(file_path, window_length, pattern_vec_file):
         for line in file.readlines():
             num_of_sessions += 1
             line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
-            if len(line) < 10:
-                print(line)
+            if len(line) < window_length + 1:
+                # print(line)
+                continue
             for i in range(len(line) - window_length):
                 input_data.append(line[i:i + window_length])
                 # line[i] is a list need to read file form a dic{vec:log_key} to get log key
@@ -85,7 +95,31 @@ def generate_seq_label(file_path, window_length, pattern_vec_file):
     return data_set
 
 
-def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
+def generate_robust_seq_label(file_path, sequence_length, pattern_vec_file):
+    with open(pattern_vec_file, 'r') as pattern_file:
+        class_type_to_vec = json.load(pattern_file)
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    train_file = pd.read_csv(file_path)
+    for i in range(len(train_file)):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        semantic_line = []
+        for event in line:
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event)])
+        input_data.append(semantic_line)
+        output_data.append(int(train_file["label"][i]))
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
     # log setting
     log_directory = root_path + 'log_out/'
     log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
@@ -93,23 +127,23 @@ def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_cl
     print("Train num_classes: ", num_of_classes)
     model = Model(input_size, hidden_size, num_of_layers, num_of_classes, True, batch_size).to(device)
     # create data set
-    sequence_data_set = generate_seq_label(data_file, window_length, pattern_vec_file)
+    sequence_data_set = generate_robust_seq_label(data_file, sequence_length, pattern_vec_file)
     # create data_loader
     data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
     writer = SummaryWriter(logdir=log_directory + log_template)
 
     # Loss and optimizer  classify job
-    criterion = nn.CrossEntropyLoss()
+    criterion = nn.BCELoss()
     optimizer = optim.Adam(model.parameters())
 
     # Training
     for epoch in range(num_epochs):
         train_loss = 0
         for step, (seq, label) in enumerate(data_loader):
-            seq = seq.clone().detach().view(-1, window_length, input_size).to(device)
+            seq = seq.clone().detach().view(-1, sequence_length, input_size).to(device)
             output = model(seq)
 
-            loss = criterion(output, label.to(device))
+            loss = criterion(output.squeeze(-1), label.float().to(device))
 
             # Backward and optimize
             optimizer.zero_grad()
@@ -117,7 +151,7 @@ def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_cl
             train_loss += loss.item()
             optimizer.step()
         print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
-        if (epoch + 1) % 100 == 0:
+        if (epoch + 1) % num_epochs == 0:
             if not os.path.isdir(model_output_directory):
                 os.makedirs(model_output_directory)
             e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
diff --git a/anomalydetection/self_att_lstm/__init__.py b/anomalydetection/self_att_lstm/__init__.py
new file mode 100644
index 0000000..9764abf
--- /dev/null
+++ b/anomalydetection/self_att_lstm/__init__.py
@@ -0,0 +1 @@
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/anomalydetection/self_att_lstm/__pycache__/__init__.cpython-36.pyc b/anomalydetection/self_att_lstm/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66927421169bd3547ca887f2883d4c72d610ebc2
GIT binary patch
literal 181
zcmXr!<>g{qH7!n@fq~&M5W@i@kmUfx#VkM~g&~+hlhJP_LlH<ALHtT`wu<qsbj!?1
zEe6s)sfk5-nR)3kxs~PlMa3EU1u;4K>4|xXIhDnk#W6nl>5edVVqShOP&g&EBsIAt
xGe0k;I5j6NKCz@EKBu@OHzqzlGcU6wK3=b&@)n0pZhlH>PO2Tq?qVQj004BZGzkCz

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm/__pycache__/self_att_lstm_predict.cpython-36.pyc b/anomalydetection/self_att_lstm/__pycache__/self_att_lstm_predict.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e405fa198859bb7c5166c5ce3250dc8e98e632c0
GIT binary patch
literal 5806
zcmb_gO>7&-72erh{%A!~6h%?LmKEEPO~+AVr$2#fI5Fa=Kd=SYaqE<^&8#^qY3bFH
znqAqlI@?3(_E6NYd+xbFb1#CNdgv+VqUd2x0SXjwPiYT57)9Fe&5{&tw@3>_!n~cI
zH^Vn?-n{SIeZ5$8e(~|IZ+&vyF#c?e{F$h~g%bS?f*YI#Mk9T)2Geb`Z8a=ivl}*^
zW{}y=HnJ?$%C)U_zA@$*Z3kt%UFaF&61PH=+dbBBc!p;m7>y#&@jRX-KE@q9C-^uo
z;92HHUcz&dPw+CH70%u@>XT1ki?Lp3Ds#ERy+GNmUG6HY)!l88z4VtsDWSZL5^X@l
zM$bshm`UuRXFj@=SbG<x)i!&^MdRihi5*+9%}s7SFu2V#4-9FyGd-i7<=Gy)X!J0r
z#}Zqb?Obe0*xqBz*jc=8T*p4YXIwSjGo%UIa_fqT7Q+wHA3bf9cTu7mL~O(?Hl@+F
zu(sVZXjRN&>-CJv#Jk<VQ`WZIRhf;T<Hjlz#llA`>IQy{{j%^wLlm&i)6UyxZ(P2=
z<_BJc=Oxb-p&xGE*uH<SBciQN_eRj!bVE0|ANkRZOP$RNgSs1bw$aVK*lWdpC%h4P
z!A8@K<7N=W+t0R|UEy)R6|Z*htNgjZ-M-D;^Ha1J6k~sW(+fS}#@_0=pwn`L=sYT^
z($E8R9zzc-qOoa6<7PEsu_56@ZrrhjE!i*49h>|h$Ya>36?kFYROaeyitXtgr=p1A
zr_Q;+kK$HmyPFzAzmYZe7ypwv1I4H8R6Vb3nyBnf*9+B{@LV4Fp%;lP77#fSV<a31
zm36z&;y4#ObhkZKyyu6!bFT@*HsdW-fq$EKy!%lz@^*H;u;oRHZO{+EC3Hdu_Nx5A
zunF`>G-$9aE3gt<u@;#)g!V9Q$&_K-{um9Pq82{WQM-Yty@jY{I%=;XYE6no$+$UV
z+%d#!JM%h@Ep9(BVmr<tinEBcoQ^b#;0z)-ACJile4pu=G*6mo`<$^e|3W-x*9Bdr
zCv+~uL!|~eU59lZ6*6xY&APcRmeAIl$7zla>IdEsw7bJ7X$&15_@+q?<(}7lF__Yz
zaKtpNMz=_iBSn=2otm=zFdjKNYtxJMnbxynmXzj5%#&CkL1C!OAbkFuCSS0HCPrll
zgW3AP%FJZT)=4Igpsz)TMSo-jxj`qJZ=*s7CYdy)#hJ`-^I^7)YY+ElWwwn()5gJY
z9MVJdF#~-#Bxb}po@wU?$wg-2&+Pgdu0h$~Xx{d@FVZXP2FgjJj-pY}^}>}YAaB5l
zEthWfercOBW3%hVTW6Pll+?!r9hfrXj>>ZHuHW+11X2udMlmjMlWsW@P)D@35X0zK
zws#l5lj#chR%JIjV%v@DIpz3acQ<ZE{=TQiw|vgMFl{*D?shY9?|ULr<5X`2ZWQ5G
zI0u$A#`~9urZ=*B;PmsSFrAvEZ&mEOK~t-(!1{<TreV&rD$C>Yh}Y2?UL5j*g`&^k
zHB>fnarRaurT3-8!WCh2hP37YP)Iort^ry)i96TR)NQTnK->K1hkyIS<=a=@MEuTR
zow}{Y`gzdfD%;tO;Q)BV4`NR!)_vaoN;=YvkY<Bs-03#&tYVqec!ZI|%4GZ-=xgZ>
zp>hg^(ttMpBWwe=-a!zJ>05s_8RuqF*jtn1{hNO_Dav9SXw_pk%%qe|(5=U9pU}5m
zW2?rivMfkPmd_coz%w&OQrKA<z2R|g5T+6V*~=!AalV}gPFC=HQz%uGX_Oku46;NH
zT4pi}%YcfHmXkT2<vC#GT-zb6MA3MPKu0DS;9@+^^W^Bb&@T3j$L!JH!41r00cG(q
z+xt|`UPSNBKP5%<6wvc~S%^!rh`eUcBC9<%#qZ=IAFE;)$X51@q4#o#JIRt<;F)h2
z$$~6mrv)1OsX1pPhvng9S@)p_?U(l6h$pb-VZv<$#EIm{$cjhO73KV$>|-Ww@$qEo
zG26LC<vLkimWLrTyztO$Px2XQOQUDCEAbTed00-;s>#u~+Mbq2DW{@dXxGv@hcz-k
z4qs@yj>R+bm^=!dV{{_0ydY=vb4EJg6)*KDNRQpgVb3SzA#ITvRpk=eOL9tUPK9|{
z)wSx*=R=)0w9aqg>tcHry~hsL>*+9$$6?i6Y89Qv-Wj<JPn=9%#>l+Zewlt9xKQE~
z4^5ocI4=+CkIkJe_+p@`?Myk4tZ7?nuw`1;rguKag<910#huSa<4sP<Dtv9;F|nV&
z;<Q)7GM|)FI9WZiU?eN3)%rS?C_(4=hz>@eb6l>-<3pV@&^ZxSc}1>Zo#QBa6nZDH
z)-?1c(Kmy39e$&Fm3q-Wi5*VCTUA*j%^r|c@4AY<I;|p4$df%FrS6^9PkldRW!6Yu
z!R#6|UrGJI3I1kNwA6WI-+KS-&8KWx1L6=mDGLwz5Fwc6Lxlk!N_mh@DM&vAlXX)N
z0EtNw&k_q!LEm>#B3cy;Xo+`*lx>8P4LHz#v7g75k@Q9U<P*$VyGAsc$M?J5{>im<
zK*jQ_%L6sxMY};v_;G4E7Q5bZ;NJEEYAa3Bz>mZf7J0%+kP-V!YgbbJwe|FQt>3<;
z+xzccXtgkKcJ=iQEP4fq=10J*LD$QtmalrClJp~52OVptU)}cHXjgcnofobSTl-UM
ze&|PA9;XEf%0Pwp%U<AiBab&@f173|PyU6!`oai}p!7RJ{s_WT8bQdSY?`RqnaU5i
zqM%GI>NK1SJIzhua!p25M!4aor?RmZMjfH-yS{f%5NHX!Q5lM@sa(|Ah_~H6m2<@=
zbYcww;#C^9sjV!q{rV|EXd%8rf?!OXCZQS38PpoJK7nt>UIfk#x~W0FQPOQ-Puf-`
zjlpineGY(+R2ftNfE70HdaWj@sY$BEofN2>6alI*P|^y_PpML_->WRlP^Ezm8Xyd+
zoKleks0KNq@X~%)=Cv!zTtm6Gj_10vE?l~#CfwU05sCnO#pRyRw5(A&=%p}IW3A3^
z7&oyGHQCw~!b3yvk2bAxB-eMh8>N(y3?{>i-cG~m2!GQDHSu;-Mn`SRMZZSQ{3?WE
zfnrw_y9%pL!>U%RVXa-bstTh~^W1$?&cXfqI@X9DL!`{m2BeqREO<%HteAPg{NUdK
z;2*Q{V2{*OVzyZWf3cxihAglOo5#~(%a~OFw{dcLj8!n|n2uG(C~8=_f?1AP!1^VY
zHOnKiYL=l>Wi`wzFtG`nhQwkS4azc_Sge3p#6xD<hkPeyL@GcrSW-YKqLff3P|A3{
z$ZE)+L@fpXN^Eh>T8YK>j7R?fm@|+CmQ?rtA}5IiTr_T~WEyZ~0pO+pYXC~H+$nB@
zc|Ev7`nA1}WDWEKNwp91Oq`d~NW`@vh|lPD3gEN&wY;2x9OD_lu)_iN8ioODfM39;
zoWXN0F1AZ@4#dbr-EL2$b<O<_V0b<*%Xv8moq0Lihy4?>1Sp@LGhlNmEPy-=T17w{
z?WDHz^XtaW#weimeel_c4?dNZz7PI*-~+wmkDuGIDbD;~J1%~yJ2u6<oRF3Njwi6=
z!mw2s>=+bC0`~hSh*o=a_n1)xyfO{vA7GCQfa<I)0AlqBAa((@iJ{H|=q!%tU<5jg
zazQQ*b$$b#LxA&~T);YuD0&onhp<)&dRe_bYP2$0fUzY+WxhR$Q=1^IK9pO041FMp
za#0={_Bk5Jb=1po0Qq6e9fzJKhxFMBKwe7U1)f7p4ds3pZv_D7Z{k{s^CaGc_(I@&
z=Su;;{}JlMTVxlBdPTn`i3`+mgv1cR1SJJ!(?^W-h58*D)*!e>wIO<mZ&8!pYsGg+
z(5s8at@`p8fYr1HrGBm-VpC&AcMmdln$&?RTjCO}e3`^G65l8B0}|Isyhq}PBpMKn
z;<Fg?J`LO;agzkS<cljLnj}6TG0gtrHZ@x$h`I=m#0H6H8J3zt#vgo`7*MNUK`;6V
zgz;jkN1}I-&MDDT8qec%fo)cio(l&F-f~z8i5#hZsQKS?04dgs>3$K(o?d~G?0r~K
zpGpad*@;wMzrwGEp)$izA9~}=^I!Z{pZVhVY^xay*AMl#g2vQft<{a)P=70=4-W_R
zG%SeT2mp(r3Vzgd@47zVHb{9>8gN9W(wHG=(k73%Sv#CQ>){I{-t>0^?>wy*(T5vy
Sprbchux9PNlP~7!&-*ub19vq5

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm/__pycache__/self_att_lstm_train.cpython-36.pyc b/anomalydetection/self_att_lstm/__pycache__/self_att_lstm_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..253937c1317542102a5d45b6350a89c311304cf7
GIT binary patch
literal 4953
zcmaJ_TW{RP6`tX3xmT@ZS(0tVRnj2H7LIJ7L5ez#bFu5Bt*s`pYp0A|g4*FqTyx3Q
zaAZpg_i0@O2x=tFL(!+a6bR5i(#QOOc?#MmqyNA!>34=J*>T!%!NcL<T+W&Eoy+Xc
z+wI18-~9cLe_LVf-|VT&LHjM-#aAGL37)c`dh($aScYQ{oxm|IcW4d0pk~^B;Nxkf
z^-&{ea2jK$%~308joLxmygO-UG#AWq7PDa&_xx~S%H&YEnI*g_54!I%Q4{_HCj8hM
z9-G?1qNt0;0~Q<?P0_-0Nwh@=&t<`{v)<ez$jCN&T(__9jz*E(za<kD%bum%H{z_w
z<>g34MXYp#8dvj3;3L=m%}6HEW*Td!zbhhb_s6?^n(FYfXN}@jG-4KT!2;`3HsFE_
z>wzEG!WIt1aD*#7Jl#s{!26UrET}=Wx^}PSB2N3Y*;MrOansbr7a)o$Ub3k*w5E)+
z(kfYKmo})Q+@V);aBRU#S6IS+=z(7=U3pPBX!)5ZTzaBsokkA*MOHdfnxg?;Q?-U|
zh=x8x%t&9S_7tmyo$r11X52Z!AhNe{V+(TO`u5G9t(%*X>TidGL>5Z7HxrS_xUZ5t
zi&9+^G9Tx=>XC)sk2pxLtKDLE97{<`YCjB<EKy;oo7;&HaTXTIL9E-^?kLO$VH)kn
zvd~tRY3J(o8`rdx7HXvDXP=LjDztYs$>K<AYtYyA_ahmNVi;JvDwq9j?aC+<nQlZ%
zh4<rRYg=ib+M7vK^lI8E;&h-JNjBb9MgZA`aB|*A2=NUs=q3XUI9fAGgGLeW>_X-=
z&VZJB81~bsD8ld`?D6Q*`P<j_uOw+);CU6IWl6Smd$fN)m&JBIzMbY<5w>W*NQ&E6
z^R2fJ+fkN}z!nhS?BZ>bEhJ${_3yuhN=8YxKHise5S7F&7C{*Aa1Xx~?r`7o_@Z^1
zA9M`N)(`i)PZptza7Mm-33u@th+;#oEQ1H9<b;)>yA1!HS`!O=ue5+PKDEtTjqo?M
zfC_s<wlG?@L3+OCS9Gn&2Wk}U$rE5{ZxpG~ZYobvr&Z$AA$?Yi^CH&v=4jNj<#WX5
z?`a;%=c$_^FA%eh4Tv*BRhY$!ScM0|tX1fGaIDhpdwq@+B;}42yo?u85K56^Hr#>Q
z&eR^d3ZVr`{n%HVoNYkr@{L}tQp`PwWnLJt$m5vaU`Jaj*Pe*)CH=VPREjwS?ZDbT
zr>*|EXBBK3H>iBzWy~^D?=*+$<}3A{>T#r;i>`DV>)k*D%2X=x5^)%IqMvmewwu@r
z@z_|;ChMW63)}GtA_c5Uahm;!;x<t}uum}Bj*^shHYEA3v8}N(bSI$6b41pOkd5U_
zM1DqO8Kmb{db`H2v<*knJPc}tWwf8&f!ZLK_aiB2x}pKX>{X{@HLO+ppk3|2tOwwQ
zxRJrd5jPrPILgItO6^t{?(9Zs^~Lx^C5;i2LbPtre6$~@X&62cp~vMKzDq)uiA)p#
z3JOs^Z_WFj?-_2NyoE0EH!`-__jj;43B&#(I+LG#{^7(C*2LYrQaVFEWtZ8V^AoT1
zU=KuB*rGP^m8}rJOMmE10Y%}Em9XFS(nV~qUtoym@Vbe+bNUv$1)F@pK4c#uYfc(l
zY>QcJ(wwxgp7G1iznZl7{#v$YYkh{b8p8brn>1(@;fWe$N>P8nAValYW6CeXzGh8N
z)k;9W<h0^lCcoMFZ11x254kJo2Mx|m+_H%*;z2^U<jX(_Ap{wz1+H!Q;GV#bxI5q8
zVmqHbExT%{=CD;ZL0!>&XqmSb$uQ}Z9iXhWp}90bJFq`Kf9Ek@Gl1(8^ys1|Ir4Sf
z#S#c*MrGU>d+1D&Bd3(@;hHW2|Lz!h+a5(@?GDmBQph_>CU`5xX`&FXdG^Hq#_-y|
zkVd0T5na4SOW<Y)FK)$IEF%?%i1o-vn{m2+A<g?yT3ke<>V7051=S-FDadWXM0XY^
z9HxRJ3U=3#2&#Xvb{rDaC<{q!N+6cD*3W6a_l!`F8yC`~Q2l%~uEe4>Ja+KXj|+A<
z<S{>k1ze%_$8oQ%oqQZ;x-R2Lq)3>Bd=s<D*NI#p@&=KMMBV{0d2=h)UNpuqAqoAn
zXpU%%vZ*(x>jUJta2%;^-M*h>BEL`Ra0{<pFp-0^@Lt?Ud634zJhfClOOqs0&^qiy
z9W+mn;USl<&Dh$;5ZyWKgcl_AU~ZQGi?}F|5{kprEl|AaCSnWaar5GsX?<$LfIo;i
z3qRaM+K_@`7~>9a;qKtK!hODkb_1La+Ar}gx4>DlUgoP@{sFy@LNW!74sH_~{)xuS
zVJQF~I_#e(&fbHg;IKDwm8EPd;2tn?+0q@lfHYhckfyNU!WmuTqK<LVn>%ZJZy6Ug
z+?k7>f#X|nMi<VA!tf~<wP#%8pvFD7;ht;%!#&9@Q4-GF6X6Q(Su3eh^)}=$NzbR<
zk+5jok(8!jv_b`<wNYh?=fMQX-@@Y=P|LT$`xtlSWM_`dfT~Eu@%YD_t*S7%d>6xe
z9r+&3{%ayfDAsn8sS3pg-UMa&8zR3Ya)e@em0k(*JztW`%MBvGCvua>Eg~Nfd7sEf
zL`bQMynvQI`Ts?y(I!r@8E)2&Q1c?8hV<_@LFhV#n>Nv)=4<S6Srf3<q$yC5P<TcO
zW-lY;5e~p_DUK2!r95W{f#wi^6pliE>J5P)V5W`qQZHHAK(g;Buk4f_QUOjN51q*z
z@YF0@nA3mACtc+a>r*zFSB+s4X{0q}bewu<O%}f3d%kMZnwQxZ?9SuKG0~hX?u}Ka
zoHJ=bAe|k@y);=amxgmdZ5Q{vn8(T}5@)h3mF;o~GOd*J<x06+ae=H8<pLe;kVH=4
z7)Q@Vv=*lf$!ihcjuUU^>stu)qCH$Hj|s>Od@riya&EXX<)_Sn1><I4mY8VXwd9xT
z#PH<sROwIo7reNMw^eH6aQW<RnsH`FPpapJr=k5%rmXZu2kV@~{>&}I2O-0kw^-R7
zu8};DrEBIQtS@XF_y8>xG-nUGZ;NOYR_7rq_TD(Cy&jMA{`MOO$8J!mzD9w|91`N<
z{MwZpJqJgm!=vYuaVVX)cLxIm#Kt(u!cjcR<$jL`9=S&%w_drl`k#NjcJfU`_EvSA
z2}RtG_McQ~2QU7L)?NGflgZgnCU?%PnUexeAYqypSo)Lm>#q(bkNMdHdwr}9=8bgt
zyf#3US!~DR(Ra9d&ube=Q@dkH9!2<+XNbH)grc6LY$quU$ORDHy@Xo0_!T<m<Nd2>
zYBwLNWTYM18mM!4GAoQj>UxYFG5J-y{WM49N71Yz9NC<ENqk?|skjceWF)k|8TIc{
zfv6pv-7#aK;<5Hn6XIlH(v`eTB3OB$9V85#`jP^>rTtNKH%6nVQkPRi_pq^rqNFa8
zJ_-Y@T*SRomHVG2WvKkC&VQX*u9;a)H|X6MwUBqvE2zsnS0(`lOQ=NSl(1kDkU0p$
za=LEv52YgcI~vj^#8d}Kd=6Ul3YEqwMAELAB$%(xmVL-LOI|_Kv{1|7-wEyAH-BLS
z^}cx`4eGktYTynQGZF?3V|HV6ImXH`XS}0bQ^shok`oyhb_(1W-L{?$Z_R1~Bhmm9
zm=hwie7<5eaJQ^3@7O+HuosXWmk`x_tBZFO&G=Eg>mc^6qUCe>0ONZrl@WY%E?>{G
z3O3E^)yOYto#5DEhxNg3cKCNf<4Ifo6uqRRuC1yT+022Bj|>)dw;H{xlC)T-U8{mV
zfi>t<-=bXwbt56^_*4PfLrb@kB8=`u3HAv7=Up>riwgP%@D#O;Do<^d$q6PM_!qEM
gIE-ARBvar&Gn^k9Cj8TubnTY4VlABYSNysE0$z>yyZ`_I

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm/self_att_lstm_predict.py b/anomalydetection/self_att_lstm/self_att_lstm_predict.py
new file mode 100644
index 0000000..b62d7ed
--- /dev/null
+++ b/anomalydetection/self_att_lstm/self_att_lstm_predict.py
@@ -0,0 +1,246 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import torch
+import os
+import torch.nn as nn
+import time
+from anomalydetection.self_att_lstm.self_att_lstm_train import Model
+import torch.nn.functional as F
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# len(line) < window_length
+
+def generate(name, window_length):
+    log_keys_sequences = list()
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            log_keys_sequences.append(tuple(line))
+    return log_keys_sequences
+
+def generate_log_deep(name, window_length):
+    log_keys_sequences = {}
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            if len(line) < window_length + 1:
+                continue
+            ln = list(map(lambda n: n-1, map(int, line.strip().split())))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            log_keys_sequences[tuple(ln)] = log_keys_sequences.get(tuple(ln), 0) + 1
+    return log_keys_sequences
+
+
+def load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, window_size):
+
+    model1 = Model(input_size, hidden_size, num_layers, num_classes, if_bidirectional=False, sequen_len=window_size).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, anomaly_test_line_path, test_file_path, num_candidates, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+
+    sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, window_length)
+
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    abnormal_loader = generate(test_file_path, window_length)
+    with open(anomaly_test_line_path) as f:
+        abnormal_label = [int(x) for x in f.readline().strip().split()]
+    # for testing model using train set
+    # abnormal_label = []
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for line in abnormal_loader:
+            i = 0
+            # first traverse [0, window_size)
+            while i < len(line) - window_length:
+                lineNum = current_file_line * 200 + i + window_length + 1
+                input_abnormal = False
+                count_num += 1
+                seq = line[i:i + window_length]
+                origin_seq = seq
+                label = line[i + window_length]
+                for n in range(len(seq)):
+                    if current_file_line * 200 + i + n + 1 in abnormal_label:
+                        input_abnormal = True
+                        continue
+                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+                #label = torch.tensor(label).view(-1).to(device)
+                output = sequential_model(seq)
+                output = F.softmax(output, 1)
+                # print(torch.sort(output, 1))
+                predicted = torch.argsort(output, 1)[0][-num_candidates:]
+                predicted = filter_small_top_k(predicted, output)
+                # print(predicted)
+                # print('Fp {} - predict result: {}, true label: {}'.format(lineNum, predicted, vec_to_class_type[tuple(label)]))
+                '''if lineNum in abnormal_label or in:  # 若出现异常日志，则接下来的预测跳过异常日志，保证进行预测的日志均为正常日志
+                    i += window_length + 1
+                else:
+                    i += 1'''
+                i += 1
+                ALL += 1
+                if vec_to_class_type[tuple(label)] not in predicted:
+                    if lineNum in abnormal_label or input_abnormal:
+                        TP += 1
+                    else:
+                        FP += 1
+
+                else:
+                    if lineNum in abnormal_label or input_abnormal:
+                        print('FN {} - predict result: {}, true label: {}'.format(lineNum, predicted, vec_to_class_type[tuple(label)]))
+                        print(torch.sort(output, 1))
+                        for l in origin_seq:
+                            print(str(vec_to_class_type[tuple(l)]), end='')
+                            print(',', end='')
+                        print(str(vec_to_class_type[tuple(label)]))
+                        FN += 1
+                    else:
+                        TN += 1
+            current_file_line += 1
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+    FAR = FP * 100 / (FP+TN)
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%, FAR: {:.3f}%'.format(Acc, P, R, F1, FAR))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
+
+
+def do_log_deep_predict(input_size, hidden_size, num_layers, num_classes, window_length, model_path, test_normal_file_path, test_abnormal_file_path, num_candidates, pattern_vec_file):
+
+    sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, window_length)
+
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    normal_loader = generate_log_deep(test_normal_file_path, window_length)
+    abnormal_loader = generate_log_deep(test_abnormal_file_path, window_length)
+    # for testing model using train set
+    # abnormal_label = []
+    print('predict start')
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for line in normal_loader.keys():
+            count_num += 1
+            print(count_num)
+            if count_num > 6000:
+                break
+            i = 0
+            # first traverse [0, window_size)
+            while i < len(line) - window_length:
+                seq = line[i:i + window_length]
+                label = line[i + window_length]
+                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+                #label = torch.tensor(label).view(-1).to(device)
+                output = sequential_model(seq)
+                output = F.softmax(output, 1)
+                # print(torch.sort(output, 1))
+                predicted = torch.argsort(output, 1)[0][-num_candidates:]
+                predicted = filter_small_top_k(predicted, output)
+                # print(predicted)
+                # print('Fp {} - predict result: {}, true label: {}'.format(lineNum, predicted, vec_to_class_type[tuple(label)]))
+                if label in predicted:
+                    TN += normal_loader[line]
+                else:
+                    FP += normal_loader[line]
+                i += 1
+    with torch.no_grad():
+        count_num = 0
+        current_file_line = 0
+        for line in abnormal_loader.keys():
+            count_num += 1
+            i = 0
+            # first traverse [0, window_size)
+            while i < len(line) - window_length:
+                seq = line[i:i + window_length]
+                label = line[i + window_length]
+                seq = torch.tensor(seq, dtype=torch.float).view(-1, window_length, input_size).to(device)
+                #label = torch.tensor(label).view(-1).to(device)
+                output = sequential_model(seq)
+                output = F.softmax(output, 1)
+                # print(torch.sort(output, 1))
+                predicted = torch.argsort(output, 1)[0][-num_candidates:]
+                predicted = filter_small_top_k(predicted, output)
+                # print(predicted)
+                # print('Fp {} - predict result: {}, true label: {}'.format(lineNum, predicted, vec_to_class_type[tuple(label)]))
+                if label in predicted:
+                    FN += abnormal_loader[line]
+                else:
+                    TP += abnormal_loader[line]
+                i += 1
+            print(count_num)
+
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 /(TP + TN + FN + FP)
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
\ No newline at end of file
diff --git a/anomalydetection/self_att_lstm/self_att_lstm_train.py b/anomalydetection/self_att_lstm/self_att_lstm_train.py
new file mode 100644
index 0000000..b90dfb5
--- /dev/null
+++ b/anomalydetection/self_att_lstm/self_att_lstm_train.py
@@ -0,0 +1,140 @@
+# -*- coding: UTF-8 -*-
+# regularization waiting for heliren sparse
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import os
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+
+# use cuda if available  otherwise use cpu
+from torch.autograd import Variable
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class Model(nn.Module):
+    def __init__(self, input_size, hidden_size, num_of_layers, out_size, if_bidirectional, sequence_len):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, bidirectional=if_bidirectional, dropout=0.5)
+        if if_bidirectional:
+            self.num_of_directions = 2
+        else:
+            self.num_of_directions = 1
+        self.fc = nn.Linear(hidden_size*self.num_of_directions, out_size)
+
+        self.att_weight = nn.Parameter(torch.randn(1, 1, self.hidden_size*self.num_of_directions))
+        self.att_bias = nn.Parameter(torch.randn(1, 1, sequence_len))
+
+        # self.out = nn.Linear(in_features=in_features, out_features=out_features)
+
+# l1 regularization will add later
+    def attention_net(self, H):
+        # print(H.size()) = [batch, numdirec*hidden, seqlen]
+        a = F.softmax(torch.matmul(self.att_weight, H) + self.att_bias, 2)
+        a = torch.transpose(a, 1, 2)
+        return torch.bmm(H, a)
+
+    def init_hidden(self, size):
+        # size self.batch_size same
+        h0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        return (h0, c0)
+
+    def forward(self, input):
+        # h_n: hidden state h of last time step
+        # c_n: hidden state c of last time step
+        out, _ = self.lstm(input, self.init_hidden(input.size(0)))
+
+        # out = torch.transpose(out, 0, 1)
+        # out shape [batch, seqlen, numdirec*hidden]
+        out = torch.transpose(out, 1, 2)
+        # out shape [batch, numdirec*hidden, seqlen]
+        att_out = self.attention_net(out)
+        # att_out shape[batch, num_direc*hidden_size, 1]
+        # att_out[:, :, 0] shape[batch, num_direc*hidden_size]
+        out = self.fc(att_out[:, :, 0])
+        # out shape[batch, num_of_class]
+        return out
+
+
+def generate_seq_label(file_path, window_length, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    with open(file_path, 'r') as file:
+        for line in file.readlines():
+            num_of_sessions += 1
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            if len(line) < window_length + 1:
+                continue
+            for i in range(len(line) - window_length):
+                input_data.append(line[i:i + window_length])
+                # line[i] is a list need to read file form a dic{vec:log_key} to get log key
+                output_data.append(vec_to_class_type[line[i + window_length]])
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+def generate_logdeep_seq_label(file_path, window_length):
+    input_data, output_data = [], []
+    with open(file_path, 'r') as file:
+        for line in file.readlines():
+            line = tuple(map(lambda n: n-1, map(int, line.strip().split())))
+            if len(line) < window_length + 1:
+                continue
+            for i in range(len(line) - window_length):
+                input_data.append(line[i:i + window_length])
+                # line[i] is a list need to read file form a dic{vec:log_key} to get log key
+                output_data.append(line[i + window_length])
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
+    # log setting
+    log_directory = root_path + 'log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    print("Train num_classes: ", num_of_classes)
+    model = Model(input_size, hidden_size, num_of_layers, num_of_classes, False, window_length).to(device)
+    # create data set
+    sequence_data_set = generate_seq_label(data_file, window_length, pattern_vec_file)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), weight_decay=0.0001)
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, label) in enumerate(data_loader):
+            seq = seq.clone().detach().view(-1, window_length, input_size).to(device)
+            output = model(seq)
+
+            loss = criterion(output, label.to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % num_epochs == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
\ No newline at end of file
diff --git a/anomalydetection/self_att_lstm_supervised/__init__.py b/anomalydetection/self_att_lstm_supervised/__init__.py
new file mode 100644
index 0000000..9764abf
--- /dev/null
+++ b/anomalydetection/self_att_lstm_supervised/__init__.py
@@ -0,0 +1 @@
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/anomalydetection/self_att_lstm_supervised/__pycache__/__init__.cpython-36.pyc b/anomalydetection/self_att_lstm_supervised/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66c3c532c0df3162809b72e358dbdb0b4afc30da
GIT binary patch
literal 192
zcmXr!<>hL<v@TAZfq~&M5W@i@kmUfx#VkM~g&~+hlhJP_LlH<ALHx>fwu<qsbj!?1
zEe6s)sfk5-nR)3kxs~PlMa3EU1u;4K>4|xXIhDnk#W6nl>5edVVqShOP&g&EBsIAt
zGe0k;I5j6NKCz@EKBu@OH@>*EAhoD0vp6*+CO$qhFS8^*Uaz3?7Kcr4eoARhsvXD~
I#X!se0F042d;kCd

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_predict.cpython-36.pyc b/anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_predict.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d3c6e27348cadffa6222b8938a94c881529eac2
GIT binary patch
literal 4198
zcmZ`+&5s;M6|d^<`Re)D+1;_d>uijXNXUY-#7YpsvK&IpDnj<kB$kXuPFlTFwYxp;
z>6xvrS??~@r%AY30d61;2yw|J-1r}mI3Z3Y4oI9<;sQqt34X77zp{$ERbQ`ORlj;4
zzk2;<yB(U}{_*4gEHL&@cJA@e{{c$=dsKo6p0a+~@;-NCukX7y=vQ1@?N?n}8`k@E
z*K70}t_}MkS}$#mTK(3j-EVW+$xl0@x&9nyk`22k^TUN1Qxg$no~X=tzbmSu_6h4R
zin?f^?TJt{(OwcQ(MEe&bi^FmD}sN>R=Y=#lWnYew*63M`B>eJb(~9WL+acc$3m)A
zZY%f4LZ&ttOhs({!DKq1gVIw$>7cxal5e5XY{m*tbA!XrywC0x{{CI#54{<?%RYL$
z2(+&Q;R*i}CIV6UgqdJinXzG2RA>Ayn_<n27lHAHHSL=KW6s&`$|Lp&hyMxtfc=yi
z4}#Y=ELQ`L57I+zfbt<qei@Z!nrqLnp^v?T8KYgXif^n|Y(-BeskHtmp4iG(I*zri
z<XR;d<&!kgI4{pWXNr#X{L}YtJ-&aio}@BIdr!tHOS0|9ql2eomG6uvkJIsXoW<!u
zp5%}3jkn)D>Brf41hSA?4s<fk9_KRMiejy!G}ogjpH8IOOL8e*I1x>h6v;rZO%81R
zb{da1MSSN9oe+f`Ufq_NRI!$8x6|<;PV+nHluW03AuFBgWd(z6X4pr)f@@}=S;3wJ
zDlq()-flqJQKUy4-%n)*m+;o!uzcU?p{(XG{XD##Cb=GrN0U+*`bL!<u6&g^CydYe
zjn%phXrT?p6PekDlCemWOy;VN4OD}wCRHs|wi-{sC19-}i$~J7pC*|YKaC*Rw%)M|
z(0cSl9^_Fjcc(HN$lUTR`T$fyb~}^T)=vam1U;wS8L#pd@9<as6|TO9@t{UjJR}N9
z;=K9C=s-`<l<|x&xbRF>_|IxXcouxLW@<zD@~~n8^5kcraRyp-26xmoQ61J#{MXc+
z$F7(9^=&uViZ&CGsM0m!)P|d}9_&PUawu)<<V<v0RfQBVNekW@k)6qx4m%@?$7mAk
zom<y`R;)G@S=oAeY^y@<B?D>a;MgL{H5?=&FM`{T)nErY0^J634_~cJRFY|1-5RS=
ztYM)r$tF`>iqYIjgpgS|2(#%ZO5+2ma@(Z-AdT}JS`W`8QC}w+=FUq`wyHXgn-|S;
zsg%!|PbYC2IX1FJ9-?C23h(hctfbz==oD}0oIZ-P&>QG%18w$SHH0qL3g2+SmznXG
z0i5vd5IBO7!;tV%i5&ihGsE_OKmYe%?{7YM2X4QNeO3e8I1V@|Y;`=<&;(SIq*^M=
zCl}>kC}*M^QJzM6Jc*vXhHXmWdB8*7CE?!&Z9oFkL!Q}2al+q1u))1N2b0~x*L(;!
zBk((eI^h+SZ5+{KMYRAR4g&*-Eb5>)_Wy1gx?<`x{)GL4t0hwr{`Xi>Ifm7`s6r8q
zWrEgP39UtFLO3!1)LUjnh_&I28?07;Fd^U;yKVy%3@>MvSP$Rgq~PZMZ4*MlZxt=m
zf?_@O7t<UzM0=Zyddao>1yM|(LvZ*@Sh?(0?|uQbS25E(4v&Uz8r)2!Y$d$o9IauS
z?x~rF6Et@3JYu?|=Z0NVMLjQC&pbE6Ic8qbHf@AcXG2lcI$ItQZhbZA^)TZ=uXO@?
zCD=K064W68Ry~D#9O09Q3szRN{QD^RG0MGu0w$3fZ%%WBirht9Y|8XJ3ZKB9t$%n7
zPtW=F<OZa)6~}x;QGG;c0}(1hA57%Y|M09XC<1sJ4D)fO$e*k?5w=cX84dD1H4oA;
zLdty&gj96fIR~~rE-`hB)>Kstv>e$QwK$+6>D4#g6c*u{R}^Eb9ov8xONS%sCQWw%
zDe(6!+LMEb=4|VP;zsi>?jsTyK2KbBj^JRt1x@5RunCY*f`~u@VVh+fMQ#vk6(?D_
zEl<=n`d~Ykqd3#aAR_Fx6}cyYNOj`Jj{1T<?D7lsPgs!OK*fOVJr{r>>Mk&31(X(Q
z3d%ZP@UEi03?88WBk&Ih@#%RP<up8BgU-&--&9NuXkIHC2vd#>CltJBqO?%jC>>-Q
zRTCf_{bCN8%bclEmLq~^LD5CVgVYSSLWORJ&;Er(ju-R$|IjtlrO^E7Z;!oV0cEl1
znZ;pU%ws1u^M?(M5G@u>!z|KmE!d}ki!Frju!+2;h0=yBm&_$mdlC#0GT^?fy9gPV
z{F$E1s5o|Qn#-d4#4|mrdyJAsgj&<IPU-v{y=6L}gK~ktW7?<m^ZacyFRIvY9&HHR
z-Fd)vcfORfHBI{pGcA<P6;@n<Jm}LG09_fhv6OX1(=>amoOy{AOXz|3nCDmESw6>u
z8SpHdC9{0W^C$3J&3d9`maxw<ikk)BRqTcD3)=q*#xKDl?cpL$-=Ucq@=RDK5FOB7
zqE+VVDGfTsoU^(Kmsqg^zApGyO7Id$I#(CUJLTazZjl=KF1fC|4r}?&bZZN76i(p!
zBRLwYgTwamMROh3v(kzHX8Sk4rsvP^d<n+r;&3@_msluws}}(1a}ZYI*fD_dgA4KE
z@M{xmPw?xS7@ww<=hDU#z)F7ibIy(g790^|99~<0K>5Xu>+2h3`|x=D(2Wm&^zLAQ
ziCb%LZh`3m@|lD%;Dq$bjq4vsq^9XP!urkEM>5W*N}duA7uS<4$#<lnEht$*ihbCX
zX*@xG9O-04yoLH9L|W~+e82`ob>XW>N$OivIfSca<7itU$7x6)lgp58u$RcE>KnxK
z3MyL}q+_5BawQ!jC#rxENp>~P3D`;u^)mbq#7e~n0M_Y9eH-gmUsi-L>UFBtsQM07
zgq(`-$9A;Dop6S!&IP`NsRZn{Tjoe-X^#z^C%B_p@8JXMt)o2LK)Ye*Pcy0$RJ2Xk
zqYMhUNo6pe0yBeL%L!zayXp=J^j)gnBB}g$@7=Tff#n}yznd5i2DWuxyBEC1hG$+v
zNpQY_ig`V+4hVIx-}1T`5x`ys{W>z89$z|s>j2p<?|L0x1=m^K!>H$yx)xUkJFYG{
ziz!NnY;}UWD$dni3>0~wqQ6P&jdNRfnd@4XSue}f$29jo&2>xqR3~Y^Mj9#aIr4}8
z`xpPe)?W1g^}>nBT~{u%c3z|CU)Q#hJc{??1owC<-QR<ANSEl9{zykVr&0GiLVr&9
ji9?P%P30YuFsJK}R8_uoGm+X_{!&nH)f@GC{p$Y#`KCRA

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_train.cpython-36.pyc b/anomalydetection/self_att_lstm_supervised/__pycache__/self_att_lstm_supervised_train.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0149994700122a6737fb278ea87bbc9bfafc66f
GIT binary patch
literal 5338
zcmaJ_&5s;M74Pb=>G|B*kL;Hhvw(>*kgSb}Kp4mQGERhS6JiTT6FW`sRL^X;d%DNf
zHTLeL`!q%okt~7~<%k4aKtdeg&L6;m1IIccamopCwUFTVs%O`ZA<U?&tE;N3UcGw1
z-+QmW*={#}`Hw%oC$<ga@5U298}(Ok<-bS44bBpyU)@=s>1VTV>AM{}eMi^axYn=f
znio59z2DIFX1|HMnY703ew)!8R?-<S^_Rxoepf%+$?|xmzru`=#jCj1;%DZDm~kgH
zxjSe5)i(@Y<K6><d!ZAr&#nFjuk*$Oqrb_UyoLJ~Z}SfB+nilDdP|=|X5(6qsrL2h
zcpQkMn<A2-=$Wd0BTVy5Tnc26hf+1DawQ8mI+^l*6o@F;PeNr6raVyAU@{%hRL4JS
zQORFKB{WD8ee(lj$T;KX0}l$ZxD7FE?r;}(rxLsGeqh)}zXs9j%DI~HFd2A@spw7@
zO`U&&LK>14#@vj}xxtLWEDYZ&ER?o%Vz*#uu{bLnZgT6Pi}qUKh>P4t%}ZVG&>bV|
z^k(d>8HGKkIT{6xZK9_o+i?d1Vq}bYY0TW3V@-eQGrhG%YnK}k+`V>V1nKbm+si#m
zHTMHKIPiy&$fauUM?4bYKt@>_B&x<mHp!;)Qxl^<Whi>f%E_mbPzc(p@_au^BkB99
zc@S|PrhXpHLe);EV?P`ENpKX3T$yRA>?_xATvc|G%duKsbe}AhEB8v2hJjG#aG)BM
zM0%$BTY(71A&y)*G82OX<%l5VscHmL`uD?VbRd;S_5CQwd!DlMFd3>wluo8n3nRJ^
zR4i+0v6V`|ZZyOTC+i1^*rWliJiI#%(?RGbVG1*<`~Dya^4#}-Ydo62e*V_gqsvhe
z=D1&hv}u%%ZjF!bXCgnyCbyDo6hLc7d6eI}l8s(Ht_Nv0Ml;7Qv|?|Oh(5_o+VHhH
z??rjYKexk|B8bwx$&pxsn5($*brc5cFc<$#W;4%p*&1`r(`?q!cDZ+aGy^(3`jKH0
z8N7}w{}u{q#7vsnbnJqWamCIC2x4xQCfeP?glV$5rJrhKuyYdzWL*<2ye--&pmBCt
z)$(j8$H5_SkD=UgAji{0?9iYuP)i;1r}-qyLuKuc$307&re^l0Vu3hA!*uagYHUL>
zVM+$*r=g_w+(luSJJ{`Pwc6Wf#++=BHb<A<;3ZUOgV+>p%mRL>ZqKdQk>Db1>i_jx
zgw7#tmtE`Cs*O3bP-MBbCGiZV*QTXRnJJft_o6}Av#X8S+IG!@bIKf?dwPRSjhG59
zU&kzq?VV;gy5(wnPmDO(oP(is>CN>~!Dgy0v4X{PPV_f#;p|FFTCuH9&!W@ANC#)f
zN>B?<)dq1JKQR|28(Le2R)-bWXcV?=UlRm&eZu;zd!AUM24YL`4JzoQMURRXsMtUO
zGl)jxEaE+<QiG#Ws4T!vF~46UlSln&h17<bxE}~k^X3f{hP7jN%!avR&Dxbh79(J7
zV4M_$0LE$f{y5{)gz7EdzdH?*sz-yQ5=+bb9g?lRfbt+r65oF;LXU~BpkI*fX--=~
zCD&ZeTFYL|b9L*$LNomcfOPiy7+!;xA_M9MyAy1F_I7D=vvdwG7k13%#wFwS`O+<1
z91nOIM^h_3X-V*Z;l=J8mdR~8G^oE`IPeYi3kLiFKv+6=Pv10d;-G$LyluP#uUj@o
z#>g;@vRSsUo`&hqeo?j$KPlRawLZjJ4eor~C>yj2cX^FGEw4W?AValYW6mz&xb>Q@
ztQ9~)!Dz)hhWN|f4-YR%@3?gn<FJEsrBgKFg<MGJ6l?>gLk0tn)<UbL+tIp(KegWd
z+fC!{hfm5b8?qU<iY7{jHy@h%sYNoBouUJCYh6=JXp5U!@1DQ?i0x|28;}jskM4x)
z>$o)Wk!O_})zG0mhmX}}3#2-56VnO&zBLXe${8kEAmOp3i13t8l1PHqS^D_=^}#Fe
zLK2Mkd2sP9S^}3bJ2MK?Py{mc;S}J>_QPcFLXr)FB)^DCHT*<G;@A@riTFl1qT6dF
z!%+d3+_+;4aO}^`yEajZ-Yg`s$j_L{+&ib(;Zs6AYFtR7Tn@7Fq!NqPaE;ls|6j1<
zS01tFv4F$n(Io7(m7Pt(RMkZo@C2?l7sQX^6)Ik(;x#JBsKpy7w3i))$_*xXjSI5=
z1-XNq5q9;KRDB3n=uZN9pxXDNlxO$JYmV^PMI$_R>fZ|o2rO_K%T$xu!kdyvs&za_
z4`2~u1IVRni)Llv71cQ&ga^2PwX}#F@-WZgA@XBK&k_8oCb)&XzJ9Rvv>w%Qfd9og
zEB~!T(m?(-6b9UB3s(pKP3Ez6)Ej8&p#CiDnkHJ-&F9z-69Gn_P(ud~bZKgM4n})I
z3tdnIIJy7{l=f#p?JPj;mNm`UfKcfj{zbw?6&@f$0Q?h?1GfNcha(;Zm_6=p8AW}|
zh-<u70lI7e`ewlhoPa-wfPf89|Eg%jo~(0ogxLyK^=ujl6#>=}=;ZK1(ZDJ%maU@2
z%{!*}GvMCf%@N}iP!gnnM=gadn;2mcEi9n^yPHHTjiRaPpcXe|3$j5jl8vOtyO(bo
zvMoFDQsJWP@&*Wio**{}pk1^z0f>uG++(Nnzi~w_NaLz?jH?Py8aRZeD3v`^yoq7T
zjQCS{65mHV$CWRD^umb^h-`nFBQ*S9KtINl@~$s}L-8^uiJr#+D@XGJ@O%YLpZ*)a
z-m<ugy0YUuL(~iY;mVwF<q`7yL4HsC0Nom78iL9V4*@Ao<`0@8{!tbxVxNYc;7<II
z9?3*|9q~2|T6!{81?t2N8u26QTCRdtf*FAh*b%p=^EMUl(TIi)C^a_4O=^9ITHARz
z4iI_`d<{h>yoW$c{FwUBoD@1p+xlG9(L(uiC=4KGO(UnlR)CxxJhxC1mOQp%ZsWd9
z7$UkIH0cfePT?Zwt1<KksJN(cIG3`?5w4N5LVRj%5V9Pq!xb_lOh}BF0f(SdD#SMq
zrAYHy3?~RYwc&2-g#koZWe4`(DO|WlWONVhatY23R*hM`hpg;MFRssxa#=RwCfsUk
zZcuvq&@5LzVuvfTO{-oqK4Q1+a+NpBwZob06iXVVDct)rxYo;!Vm)4hW9s5s=F3<a
zG4w)~^`c#@L#EAQx!8<X_zEz&5wBvTmKBKDDpuk(oiJ`u@(ee-R%{_vHh}*%^lebv
z-QNIJjJM;>VwFRBIOa9ERV>BZb9PE6fp~Axvu^MvQ2nty74O7fAXfQ^<@<O#O?9O0
zPit1cr?tB+zZmaA4_}%S=X9{nHZ--g0f!42J_h=`@s~*+$kNqwQG&U8ZRQ~tqEPSb
ztotet#{T1s?xk7nyWu1o9K1AJy+K*cE>XHR*f2l8d-+DsM%q~=^;J?<%MYf*A)H8K
z5~cn)9B1MPV0W<u61dU7e*4eAUfqA|Wi+*_e9-6NAUJwVCbKi&qXl>0eZM^WetG-(
zU2Xix-TWlWvF7{d_g);9kJ#CnwKtKoWi1xEcZZ0>^MjC!AuLiCy|0Kd>dKi2aE*=(
z#aZe+M+I53peR5P!w8BbRqeIczjp=8C?}i9Xsm3~2exvY*5;ZIR6T?mbhx3ML6X4{
zBBhW4@?HC06y8T@jl?uW{y`L=>XeB1BLQyl_JhG4ik_8?4<ndAm*GUY2$+$DRWu{%
z6*J3~4R4ABI?ACp<&A?oAu4(0*X=yG2Nma%A|4(M5H&%LJnWqkDQ)J9RCK9Wrs+Bh
zN4z*V)u3m6u6cE8b&+MViVoHhriTfcgZ9q)`vT5T)wOdbKdre-waI9zoSeE_^oT7@
z5{RT6J&9UAVZCF|u9~_=M2n9E?tT4zNYw}Wju=7Jwc_zv0-q4Hgvabhr!PLB-B_4a
zgj91YDT%o0%<v%~8$C^a&5As=K%`rpOB<%gHce2P*)qGVV}atV6_DLJD9$st@$6{=
z+-2wm_3f}-w1{8heW;i|$SmTk2|jNGxrj&_Pk-{*OVi4hn$@G;k0>ypzj{1iZ#Yel
zzuGh&*Tu6m@lR>4Ru#DIXRwubw1KK__4ZUoNxnx~to#jGaKBUa1yczH1YA(+Sy?${
z<Ej<qesC{{;QxS3_o`0BDm+k3R6&6@_k{Aa57F+!y8z`ObGu0XE#E;w>hYi!inB;j
PU8`konk(nLZEy2GGSGA{

literal 0
HcmV?d00001

diff --git a/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_predict.py b/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_predict.py
new file mode 100644
index 0000000..7414db8
--- /dev/null
+++ b/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_predict.py
@@ -0,0 +1,131 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import torch
+import json
+import pandas as pd
+import numpy as np
+import os
+import torch.nn as nn
+import time
+import random
+from torch.utils.data import TensorDataset, DataLoader
+from anomalydetection.self_att_lstm_supervised.self_att_lstm_supervised_train import Model
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# len(line) < window_length
+
+def generate(name, window_length):
+    log_keys_sequences = list()
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            log_keys_sequences.append(tuple(line))
+    return log_keys_sequences
+
+
+
+def load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, batch_size, sequence_length):
+
+    model1 = Model(input_size, hidden_size, num_layers, num_classes, if_bidirectional=False, batch_size=0, sequence_len=sequence_length).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+
+def filter_small_top_k(predicted, output):
+    filter = []
+    for p in predicted:
+        if output[0][p] > 0.001:
+            filter.append(p)
+    return filter
+
+
+def generate_robust_seq_label(file_path, sequence_length, pattern_vec_file):
+    with open(pattern_vec_file, 'r') as pattern_file:
+        class_type_to_vec = json.load(pattern_file)
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    train_file = pd.read_csv(file_path)
+    i = 0
+    while i < len(train_file):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        semantic_line = []
+        for event in line:
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event - 1)])
+        input_data.append(semantic_line)
+        output_data.append(int(train_file["label"][i]))
+        i += random.randint(6, 8)
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def do_predict(input_size, hidden_size, num_layers, num_classes, sequence_length, model_path, test_file_path, batch_size, pattern_vec_json):
+
+    sequential_model = load_sequential_model(input_size, hidden_size, num_layers, num_classes, model_path, batch_size, sequence_length)
+
+    start_time = time.time()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+
+    # create data set
+    sequence_data_set = generate_robust_seq_label(test_file_path, sequence_length, pattern_vec_json)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+
+    print('predict start')
+    with torch.no_grad():
+        count = 0
+        for step, (seq, label) in enumerate(data_loader):
+            # first traverse [0, window_size)
+            seq = seq.view(-1, sequence_length, input_size).to(device)
+            #label = torch.tensor(label).view(-1).to(device)
+            output = sequential_model(seq)[:, 0].clone().detach().numpy()
+            predicted = (output > 0.2).astype(int)
+            label = np.array([y for y in label])
+            TP += ((predicted == 1) * (label == 1)).sum()
+            FP += ((predicted == 1) * (label == 0)).sum()
+            FN += ((predicted == 0) * (label == 1)).sum()
+            TN += ((predicted == 0) * (label == 0)).sum()
+            count += 1
+            if count > 100000:
+                break
+    ALL = TP + TN + FP + FN
+    # Compute precision, recall and F1-measure
+    if TP + FP == 0:
+        P = 0
+    else:
+        P = 100 * TP / (TP + FP)
+
+    if TP + FN == 0:
+        R = 0
+    else:
+        R = 100 * TP / (TP + FN)
+
+    if P + R == 0:
+        F1 = 0
+    else:
+        F1 = 2 * P * R / (P + R)
+
+    Acc = (TP + TN) * 100 / ALL
+
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'], [Acc, P, R, F1], 'evaluations', '%')
\ No newline at end of file
diff --git a/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_train.py b/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_train.py
new file mode 100644
index 0000000..219e7a1
--- /dev/null
+++ b/anomalydetection/self_att_lstm_supervised/self_att_lstm_supervised_train.py
@@ -0,0 +1,154 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+import json
+import torch
+import pandas as pd
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import os
+from tensorboardX import SummaryWriter
+from torch.utils.data import TensorDataset, DataLoader
+
+# use cuda if available  otherwise use cpu
+from torch.autograd import Variable
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class Model(nn.Module):
+    def __init__(self, input_size, hidden_size, num_of_layers, out_size, if_bidirectional, batch_size, sequence_len):
+        super(Model, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_of_layers = num_of_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_of_layers, batch_first=True, bidirectional=if_bidirectional, dropout=0.5)
+        if if_bidirectional:
+            self.num_of_directions = 2
+        else:
+            self.num_of_directions = 1
+        self.fc = nn.Linear(hidden_size*self.num_of_directions, out_size)
+        self.batch_size = batch_size
+
+        self.att_weight = nn.Parameter(torch.randn(1, 1, self.hidden_size*self.num_of_directions))
+        self.att_bias = nn.Parameter(torch.randn(1, 1, sequence_len))
+        # self.out = nn.Linear(in_features=in_features, out_features=out_features)
+
+    # l1 regularization will add later
+    def attention_net(self, H):
+        # print(H.size()) = [batch, numdirec*hidden, seqlen]
+        a = F.softmax(torch.matmul(self.att_weight, H) + self.att_bias, 2)
+        a = torch.transpose(a, 1, 2)
+        return torch.bmm(H, a)
+
+
+    def init_hidden(self, size):
+        # size self.batch_size same
+        h0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_of_layers*self.num_of_directions, size, self.hidden_size).to(device)
+        return (h0, c0)
+
+    def forward(self, input):
+        # h_n: hidden state h of last time step
+        # c_n: hidden state c of last time step
+        out, _ = self.lstm(input, self.init_hidden(input.size(0)))
+
+        # out = torch.transpose(out, 0, 1)
+        # out shape [batch, seqlen, numdirec*hidden]
+        out = torch.transpose(out, 1, 2)
+        # out shape [batch, numdirec*hidden, seqlen]
+        att_out = self.attention_net(out)
+
+        out = self.fc(att_out[:, :, 0])
+        # out shape[batch, num_of_class = 1]
+        # add sigmoid
+        return torch.sigmoid(out)
+
+
+def generate_seq_label(file_path, window_length, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    with open(file_path, 'r') as file:
+        for line in file.readlines():
+            num_of_sessions += 1
+            line = tuple(map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            if len(line) < window_length + 1:
+                # print(line)
+                continue
+            for i in range(len(line) - window_length):
+                input_data.append(line[i:i + window_length])
+                # line[i] is a list need to read file form a dic{vec:log_key} to get log key
+                output_data.append(vec_to_class_type[line[i + window_length]])
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def generate_robust_seq_label(file_path, sequence_length, pattern_vec_file):
+    with open(pattern_vec_file, 'r') as pattern_file:
+        class_type_to_vec = json.load(pattern_file)
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    train_file = pd.read_csv(file_path)
+    for i in range(len(train_file)):
+        num_of_sessions += 1
+        line = [int(id) for id in train_file["Sequence"][i].split(' ')]
+        line = line[0:sequence_length]
+        if len(line) < sequence_length:
+            line.extend(list([0]) * (sequence_length - len(line)))
+        semantic_line = []
+        for event in line:
+            if event == 0:
+                semantic_line.append([-1] * 300)
+            else:
+                semantic_line.append(class_type_to_vec[str(event - 1)])
+        input_data.append(semantic_line)
+        output_data.append(int(train_file["label"][i]))
+    data_set = TensorDataset(torch.tensor(input_data, dtype=torch.float), torch.tensor(output_data))
+    return data_set
+
+
+def train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, root_path, model_output_directory, data_file, pattern_vec_file):
+    # log setting
+    log_directory = root_path + 'log_out/'
+    log_template = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs)
+
+    print("Train num_classes: ", num_of_classes)
+    model = Model(input_size, hidden_size, num_of_layers, num_of_classes, False, batch_size, sequence_length).to(device)
+    # create data set
+    sequence_data_set = generate_robust_seq_label(data_file, sequence_length, pattern_vec_file)
+    # create data_loader
+    data_loader = DataLoader(dataset=sequence_data_set, batch_size=batch_size, shuffle=True, pin_memory=False)
+    writer = SummaryWriter(logdir=log_directory + log_template)
+
+    # Loss and optimizer  classify job
+    criterion = nn.BCELoss()
+    optimizer = optim.Adam(model.parameters(), weight_decay=0.001)
+
+    # Training
+    for epoch in range(num_epochs):
+        train_loss = 0
+        for step, (seq, label) in enumerate(data_loader):
+            seq = seq.clone().detach().view(-1, sequence_length, input_size).to(device)
+            output = model(seq)
+
+            loss = criterion(output.squeeze(-1), label.float().to(device))
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            train_loss += loss.item()
+            optimizer.step()
+        print('Epoch [{}/{}], training_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / len(data_loader.dataset)))
+        if (epoch + 1) % num_epochs == 0:
+            if not os.path.isdir(model_output_directory):
+                os.makedirs(model_output_directory)
+            e_log = 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(epoch+1)
+            torch.save(model.state_dict(), model_output_directory + '/' + e_log + '.pt')
+    writer.close()
+    print('Training finished')
\ No newline at end of file
diff --git a/deeplog_detection.py b/deeplog_detection.py
index 4341b7b..508cd1c 100644
--- a/deeplog_detection.py
+++ b/deeplog_detection.py
@@ -1,75 +1,62 @@
 import os
-from logparsing.fttree import fttree
-from extractfeature import hdfs_fs_deeplog_preprocessor
+import sys
+sys.path.append('./')
+from logparsing.drain.HDFS_drain import get_hdfs_drain_clusters
+from extractfeature.hdfs_deeplog_preprocessor import hdfs_preprocessor
 from anomalydetection.deeplog.Model1 import log_key_LSTM_train
 from anomalydetection.deeplog.Model2 import variable_LSTM_train
 from anomalydetection.deeplog import  log_predict
 
-# 原始日志文件
-log_file_dir = './Data/log/hdfs/'
-log_file_name = 'HDFS_split'
-log_file_abnormal_label = 'HDFS_split_anomaly'
-# FT-tree
-log_result = './Data/FTTreeResult-HDFS/'
-log_fttree_out_dir = log_result+'clusters/'
+
 # log_train,log_test,logkey,logvalue
-log_preprocessor_dir = log_result+'deeplog_files/'
-# model
-model_dir = log_result+'deeplog_model_train/'
+log = './Data/log/hdfs/HDFS_40w'
+drain_out = './Data/Drain_HDFS/clusters/'
+bin_dir = './HDFS_drain3_state.bin'
+log_preprocessor_dir = './Data/Drain_HDFS/log_preprocessor'
+model_dir = './Data/Drain_HDFS/deeplog_model_train/'
+
 # train parameters
 window_length = 4
 input_size = 1
 hidden_size = 20
 num_of_layers = 3
-model1_num_epochs = 300
+model1_num_epochs = 100
 model1_batch_size = 200
 model2_num_epochs = 50
 model2_batch_size = 20
 learning_rate = 0.01
 num_candidates = 3
 mse_threshold = 0.1
+# 是否使用模型二
+use_model2 = False
 
-if not os.path.exists(log_result):
-    os.makedirs(log_result)
-if not os.path.exists(log_fttree_out_dir):
-    os.makedirs(log_fttree_out_dir)
-if not os.path.exists(log_preprocessor_dir):
-    os.makedirs(log_preprocessor_dir)
 if not os.path.exists(model_dir):
     os.makedirs(model_dir)
 
-# FT-tree
-def pattern_extract():
-    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_dir, 5, 4, 2)
-
-# 将原日志文件分成训练集和测试集两部分
-def log_split():
-    hdfs_fs_deeplog_preprocessor.log_split(log_file_dir,log_file_name,log_file_abnormal_label,log_preprocessor_dir)
+def drain():
+    get_hdfs_drain_clusters(log,drain_out,bin_dir)
 
-# 生成log_key
-def generate_log_key():
-    hdfs_fs_deeplog_preprocessor.generate_log_key(log_file_dir,log_file_abnormal_label,log_preprocessor_dir,log_fttree_out_dir)
-
-# 提取并处理log_value
-def generate_log_value():
-    hdfs_fs_deeplog_preprocessor.generate_log_value(log_file_dir,log_file_name,log_file_abnormal_label,log_preprocessor_dir,log_fttree_out_dir)
+def generate_logkey_and_value():
+    hdfs_preprocessor()
 
 # 训练
+def train_model():
+    train_model1()
+    if use_model2:
+        train_model2()
+
 def train_model1():
-    log_key_LSTM_train.train_model1(model_dir,log_preprocessor_dir,log_fttree_out_dir,model1_num_epochs,model1_batch_size,window_length,input_size,hidden_size,num_of_layers)
+    log_key_LSTM_train.train_model1(model_dir,log_preprocessor_dir,drain_out,model1_num_epochs,model1_batch_size,window_length,input_size,hidden_size,num_of_layers)
 
 def train_model2():
     variable_LSTM_train.train_model2(model_dir,log_preprocessor_dir,model2_num_epochs,model2_batch_size,window_length,num_of_layers,learning_rate,hidden_size)
 
 # 测试
 def test_model():
-    log_predict.do_predict(log_preprocessor_dir,model_dir,window_length,input_size, hidden_size, num_of_layers,num_candidates,mse_threshold)
-
+    model1_name = 'Adam_batch_size=' + str(model1_batch_size) + ';epoch=' + str(model1_num_epochs) + '.pt'
+    log_predict.do_predict(log_preprocessor_dir,drain_out,model_dir,model1_name,model2_num_epochs,window_length, input_size, hidden_size, num_of_layers, num_candidates, mse_threshold, use_model2)
 
-# pattern_extract()
-# log_split()
-# generate_log_key()
-# generate_log_value()
-# train_model1()
-# train_model2()
-test_model()
\ No newline at end of file
+#drain()
+generate_logkey_and_value()
+# train_model()
+#test_model()
\ No newline at end of file
diff --git a/ecoder_anomaly_detection.py b/ecoder_anomaly_detection.py
new file mode 100644
index 0000000..78f9038
--- /dev/null
+++ b/ecoder_anomaly_detection.py
@@ -0,0 +1,71 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+
+import os
+from logparsing.fttree import fttree
+from extractfeature import hdfs_ft_preprocessor
+from anomalydetection.loganomaly import log_anomaly_sequential_train
+from anomalydetection.loganomaly import log_anomaly_sequential_predict
+from anomalydetection.att_all_you_need import encoder_self_att_train
+from anomalydetection.att_all_you_need import encoder_self_att_predict
+
+# parameters for early prepare
+logparser_structed_file = './Data/logparser_result/Drain/HDFS.log_structured.csv'
+logparser_event_file = './Data/logparser_result/Drain/HDFS.log_templates.csv'
+anomaly_label_file = './Data/log/hdfs/anomaly_label.csv'
+sequential_directory = './Data/DrainResult-HDFS/sequential_files/'
+train_file_name = 'robust_train_file'
+test_file_name = 'robust_test_file'
+valid_file_name = 'robust_valid_file'
+wordvec_file_path = './Data/pretrainedwordvec/crawl-300d-2M.vec(0.1M)'
+pattern_vec_out_path = './Data/DrainResult-HDFS/pattern_vec'
+variable_symbol = '<*> '
+
+# my encoder
+sequence_length = 50
+input_size = 300
+hidden_size = 256
+num_of_layers = 4
+# 1 using sigmoid, 2 using softmax
+num_of_classes = 1
+num_epochs = 100
+batch_size = 1000
+# for robust attention bi
+train_root_path = './Data/DrainResult-HDFS/att_all_you_need/'
+model_out_path = train_root_path + 'model_out/'
+train_file = sequential_directory + train_file_name
+pattern_vec_json = pattern_vec_out_path
+dropout = 0.5
+num_of_heads = 8
+pf_dim = 512
+
+
+# predict parameters
+# log anomaly sequential model parameters
+
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+def train_model():
+    encoder_self_att_train.train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, train_file, pattern_vec_json, dropout, num_of_heads, pf_dim)
+
+
+def test_model():
+    # do something
+    encoder_self_att_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, sequence_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + valid_file_name, batch_size, pattern_vec_json, dropout, num_of_heads, pf_dim)
+
+#pattern_extract()
+#extract_feature()
+#train_model()
+#train_model()
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+
diff --git a/extractfeature/hdfs_deeplog_preprocessor.py b/extractfeature/hdfs_deeplog_preprocessor.py
new file mode 100644
index 0000000..ed89416
--- /dev/null
+++ b/extractfeature/hdfs_deeplog_preprocessor.py
@@ -0,0 +1,261 @@
+import csv
+import os
+import random
+
+class hdfs_deeplog_preprocessor:
+    # 日志变量设置
+    LOG_LINE = 400000
+    NUM_OF_LOGKEY = 31
+    VECTOR_DIMENSION = 10
+    NORMAL_STAGE_TO_STAGE_SIZE = [2000, 1000, 1000]
+    ABNORMAL_STAGE_TO_STAGE_SIZE = [800, 200, 200]
+
+    # 读入数据部分
+    ANOMALY_LABEL = './Data/log/hdfs/anomaly_label.csv'
+    LOG_FILE = './Data/log/hdfs/HDFS_40w'
+    MOFIFIED_LOG_FILE = './Data/log/hdfs/modified_HDFS_40w'
+    WORD_VECTOR_FILE = './Data/log/hdfs/word2vec_HDFS_40w'
+    LOGKEY_DIR = './Data/Drain_HDFS/clusters/'
+    is_block_normal = {}
+    block_to_lines = {}
+    line_to_logkey = []
+    word_to_vector = {}
+    modified_logs = []
+
+    # 输出数据部分
+    OUTPUT_DIR_PREFIX = './Data/Drain_HDFS/log_preprocessor/'
+    STAGE_TO_OUTPUT_DIR_INFIX = ['train/','validate/','test/']
+    normal_blocks = []
+    abnormal_blocks = []
+    normal_block_index_to_stage = []
+    abnormal_block_index_to_stage = []
+
+
+
+    '''
+    -----------------------------------------------
+    以下是load_data部分
+    -----------------------------------------------
+    '''
+
+    def load_normal_info(self):
+        NORMAL_WORD = 'Normal'
+        FIRST_LINE_BLOCK_NAME = 'BlockId'
+
+        with open(self.ANOMALY_LABEL,'r') as f:
+            lines = csv.reader(f)
+            for line in lines:
+                block = line[0]
+                normal_word = line[1]
+                if normal_word == NORMAL_WORD:
+                    normal_info = True
+                else:
+                    normal_info = False
+                if block != FIRST_LINE_BLOCK_NAME:
+                    self.is_block_normal[block] = normal_info
+
+    def load_line_info(self):
+        with open(self.LOG_FILE,'r') as f:
+            for line_index in range(self.LOG_LINE):
+                line = f.readline()
+                block = self.get_blockid(line)
+                if block not in self.block_to_lines.keys():
+                    self.block_to_lines[block] = []
+                self.block_to_lines[block].append(line_index)
+        # print(self.block_to_lines['blk_-1608999687919862906'])
+
+    def load_logkey_info(self):
+        self.line_to_logkey = [0 for i in range(self.LOG_LINE)]
+        for logkey in range(1,self.NUM_OF_LOGKEY+1):
+            with open(self.LOGKEY_DIR+str(logkey),'r') as f:
+                print(self.LOGKEY_DIR+str(logkey))
+                lines = f.readline().strip().split(' ')
+                for line in lines:
+                    line_index = int(line)
+                    if line_index>=self.LOG_LINE:
+                        print('cluster文件中某行的行数过大')
+                        print(line)
+                        exit(2)
+                    self.line_to_logkey[line_index] = logkey
+
+    def load_word_vector(self):
+        with open(self.WORD_VECTOR_FILE, 'r') as r:
+            for line in r.readlines():
+                list_line = line.split(' ')
+                value = list(map(float, list_line[1:]))
+                key = list_line[0]
+                self.word_to_vector[key] = value
+
+    def load_modified_log(self):
+        with open(self.MOFIFIED_LOG_FILE, 'r') as file:
+            content_list = file.readlines()
+            self.modified_logs = [x.strip() for x in content_list]
+
+    def generate_block_list(self):
+        for block in self.block_to_lines.keys():
+            if self.is_block_normal[block]:
+                self.normal_blocks.append(block)
+            else:
+                self.abnormal_blocks.append(block)
+
+    '''
+    -----------------------------------------------
+    以下是一些辅助函数
+    -----------------------------------------------
+    '''
+
+    def get_blockid(self, line):
+        words = line.strip().split(' ')
+        for word in words:
+            if len(word)>4 and word[:4] == 'blk_':
+                return word
+        print('无法找到block_id')
+        print(line)
+        exit(1)
+
+
+    def get_sentence_vector(self, sentence):
+        words = sentence.split(' ')
+        old_vector = [0.0 for i in range(self.VECTOR_DIMENSION)]
+        for word in words:
+            # print(word)
+            if word not in self.word_to_vector.keys():
+                another_vector = [0.0 for i in range(self.VECTOR_DIMENSION)]
+            else:
+                another_vector = self.word_to_vector[word]
+            new_vector = []
+            for i, j in zip(old_vector, another_vector):
+                new_vector.append(i + j)
+            old_vector = new_vector
+
+        word_count = len(words)
+        for idx, value in enumerate(old_vector):
+            old_vector[idx] = value / word_count
+        vector_str = list(map(str, old_vector))
+        sentence_vector = ','.join(vector_str)
+        return sentence_vector
+
+    def get_logkey_and_logvalue_for_session(self, lines):
+        logkeys = []
+        logkey_to_logvalues = [[] for i in range(self.NUM_OF_LOGKEY+1)]
+        for line in lines:
+            logkey = self.line_to_logkey[line]
+            logkeys.append(logkey)
+            log = self.modified_logs[line]
+            vector = self.get_sentence_vector(log)
+            logkey_to_logvalues[logkey].append(vector)
+        return logkeys,logkey_to_logvalues
+    '''
+    -----------------------------------------------
+    以下是output_logkey_and_logvalue部分
+    -----------------------------------------------
+    '''
+
+    def get_block_stage_info(self,total_length,stage_to_length):
+        if sum(stage_to_length) > total_length:
+            print('要输出的条目太大，大于数据集中存在的条目。')
+            print(total_length)
+            print(stage_to_length)
+            exit(3)
+        block_index_list = [i for i in range(total_length)]
+        random.shuffle(block_index_list)
+        table = [-1 for i in range(total_length)]
+
+        used_block_count = 0
+        for stage in range(len(stage_to_length)):
+            block_index_start = used_block_count
+            block_index_end = used_block_count + stage_to_length[stage]
+            for block_index in block_index_list[block_index_start:block_index_end]:
+                table[block_index] = stage
+            used_block_count = block_index_end
+        return table
+
+    def output(self,stage,output_normal):
+        if output_normal:
+            OUTPUT_DIR_SUFFIXES = ['logkey/','logvalue/normal/']
+            LOGKEY_FILE = 'normal'
+            blocks = self.normal_blocks
+            block_index_to_stage = self.normal_block_index_to_stage
+        else:
+            OUTPUT_DIR_SUFFIXES = ['logkey/', 'logvalue/abnormal/']
+            LOGKEY_FILE = 'abnormal'
+            blocks = self.abnormal_blocks
+            block_index_to_stage = self.abnormal_block_index_to_stage
+
+        LOGKEY_OUTPUT_DIR = self.OUTPUT_DIR_PREFIX + \
+                            self.STAGE_TO_OUTPUT_DIR_INFIX[stage] + OUTPUT_DIR_SUFFIXES[0]
+        LOGVALUE_OUTPUT_DIR = self.OUTPUT_DIR_PREFIX + \
+                              self.STAGE_TO_OUTPUT_DIR_INFIX[stage] + OUTPUT_DIR_SUFFIXES[1]
+        if not os.path.exists(LOGKEY_OUTPUT_DIR):
+                os.makedirs(LOGKEY_OUTPUT_DIR)
+        if not os.path.exists(LOGVALUE_OUTPUT_DIR):
+                os.makedirs(LOGVALUE_OUTPUT_DIR)
+        logkey_writelist = []
+        logkey_to_logvalue_writelist = [[] for i in range(self.NUM_OF_LOGKEY+1)]
+
+        for block_index,block in enumerate(blocks):
+            if block_index_to_stage[block_index] == stage:
+                lines = self.block_to_lines[block]
+                logkeys, logkey_to_logvalues = \
+                    self.get_logkey_and_logvalue_for_session(lines)
+                logkey_line = ' '.join(str(logkey) for logkey in logkeys)
+                logkey_writelist.append(logkey_line+'\n')
+                for logkey in range(1,self.NUM_OF_LOGKEY+1):
+                    if len(logkey_to_logvalues[logkey]) == 0:
+                        logvalue_line = '-1'
+                    else:
+                        logvalue_line = ' '.join(logkey_to_logvalues[logkey])
+                    logkey_to_logvalue_writelist[logkey].append(logvalue_line+'\n')
+
+        with open(LOGKEY_OUTPUT_DIR + LOGKEY_FILE,'w') as f:
+            f.writelines(logkey_writelist)
+        for logkey in range(1,self.NUM_OF_LOGKEY+1):
+            LOGVALUE_FILE = str(logkey)
+            with open(LOGVALUE_OUTPUT_DIR + LOGVALUE_FILE,'w') as f:
+                f.writelines(logkey_to_logvalue_writelist[logkey])
+
+
+    '''
+    -----------------------------------------------
+    以下是main函数部分
+    -----------------------------------------------
+    '''
+
+
+    def load_data(self):
+        self.load_normal_info()
+        print('正常/异常标签加载成功')
+        self.load_line_info()
+        print('数据集block信息加载成功')
+        self.load_logkey_info()
+        print('从clusters取出logkey信息成功')
+        self.load_word_vector()
+        print('读入word vector信息成功')
+        self.load_modified_log()
+        print('读入log信息成功')
+        self.generate_block_list()
+        print('将block划分为正常/异常成功')
+
+    def output_logkey_and_logvalue(self):
+        self.abnormal_block_index_to_stage = self.get_block_stage_info \
+            (len(self.abnormal_blocks),self.ABNORMAL_STAGE_TO_STAGE_SIZE)
+        print('给异常block选择train validate test数据成功')
+        self.normal_block_index_to_stage = self.get_block_stage_info \
+            (len(self.normal_blocks), self.NORMAL_STAGE_TO_STAGE_SIZE)
+        print('给正常block选择train validate test数据成功')
+        for stage in range(len(self.STAGE_TO_OUTPUT_DIR_INFIX)):
+            self.output(stage, output_normal=True)
+            print('给阶段' + str(stage) + '输出正常logkey和logvalue成功')
+            self.output(stage, output_normal=False)
+            print('给阶段' + str(stage) + '输出异常logkey和logvalue成功')
+
+    def __init__(self):
+        self.load_data()
+        print('数据加载成功')
+        print('正常的session数：' + str(len(self.normal_blocks)))
+        print('异常的session数：' + str(len(self.abnormal_blocks)))
+        self.output_logkey_and_logvalue()
+        print('数据生成成功')
+
+def hdfs_preprocessor():
+    hdfs_deeplog_preprocessor()
diff --git a/extractfeature/hdfs_fs_deeplog_preprocessor.py b/extractfeature/hdfs_fs_deeplog_preprocessor.py
index 0d94ff8..0044e0e 100644
--- a/extractfeature/hdfs_fs_deeplog_preprocessor.py
+++ b/extractfeature/hdfs_fs_deeplog_preprocessor.py
@@ -71,8 +71,18 @@ def generate_log_key(log_file_dir,log_file_abnormal_label,log_preprocessor_dir,l
 
 # 提取并处理log_value
 def generate_log_value(log_file_dir,log_file_name,log_file_abnormal_label,log_preprocessor_dir,log_fttree_out_dir):
-    log = log_file_dir+log_file_name
+    N_CLUSTER = 21
+    WORD2VEC_FILE = 'word2vec'
+    STRING_VECTOR_FILE = 'string_vector'
+
+    log_list = []
+    word_vector = {}
+
+    # log = log_file_dir+log_file_name
+    word2vec = log_file_dir+WORD2VEC_FILE
+    string_vector = log_file_dir+STRING_VECTOR_FILE
     in_abnormal = log_file_dir+log_file_abnormal_label
+
     log_value_dir = ['logvalue_train/', 'logvalue_test/']
     log_value_train_directory = log_preprocessor_dir+log_value_dir[0]
     log_value_test_directory = log_preprocessor_dir +log_value_dir[1]
@@ -83,53 +93,58 @@ def generate_log_value(log_file_dir,log_file_name,log_file_abnormal_label,log_pr
     if not os.path.exists(log_value_test_directory):
         os.makedirs(log_value_test_directory)
 
-    log_list = []
-    with open(log, 'r') as file:
+    with open(string_vector, 'r') as file:
         content_list = file.readlines()
         log_list = [x.strip() for x in content_list]
 
+    with open(word2vec, 'r') as r:
+        for line in r.readlines():
+            list_line = line.split(' ')
+            value = list(map(float, list_line[1:]))
+            key = list_line[0]
+            word_vector[key] = value
+
     abnormal = get_abnormal(in_abnormal)
     clusters = get_logkey(log_fttree_out_dir)[0]
 
     num = [0, 170000, 199999]
 
-    for i in range(0, 2):
-        for j in range(1, 62):
+    for i in range(len(log_value_dir)):
+        for j in range(N_CLUSTER):
             print("process:", i, j)
-            para1 = []
-            para2 = []
-            para3 = []
-            out_path = log_preprocessor_dir + log_value_dir[i] + str(j) + ".txt"
-            for t in clusters[j - 1]:
+            out_path = log_preprocessor_dir + log_value_dir[i] + str(j+1) + ".txt"
+            write_list = []
+            for t in clusters[j]:
                 s = int(t)
-                if (i != 1 and s not in abnormal and s >= num[i] and s < num[i + 1]) or (
-                        i == 1 and s >= num[i] and s < num[i + 1]):
-                    templog = []
-                    for word in log_list[s].split(' '):
-                        templog.append(word)
-                    para1.append(int(templog[0]))
-                    para2.append(int(templog[1]))
-                    para3.append(int(templog[2]))
+                if (i != 1 and s not in abnormal and num[i] <= s < num[i + 1]) or (
+                        i == 1 and num[i] <= s < num[i + 1]):
+                    output = calc_sentence_vector(log_list[s],word_vector)
+                    write_list.append(output)
                 elif s >= num[i + 1]:
-                    break;
-            if len(para1) > 0:
-                para1 = preprocessing.scale(para1)
-            if len(para2) > 0:
-                para2 = preprocessing.scale(para2)
-            if len(para3) > 0:
-                para3 = preprocessing.scale(para3)
+                    break
 
             with open(out_path, mode='w', encoding='utf-8') as f:
-                for w in range(0, len(para1)):
-                    print(para1[w], file=f, end='')
-                    print(' ', file=f, end='')
-                    print(para2[w], file=f, end='')
-                    print(' ', file=f, end='')
-                    print(para3[w], file=f, end='')
-                    print(' ', file=f, end='')
-                    print(' ', file=f)
-
-
-
-
-
+                f.write('\n'.join(write_list))
+
+def calc_sentence_vector(sentence,word_vector):
+    VECTOR_DIMENSION = 10
+
+    words = sentence.split(' ')
+    old_vector = [0.0 for i in range(VECTOR_DIMENSION)]
+    for word in words:
+        # print(word)
+        if word not in word_vector.keys():
+            another_vector = [0.0 for i in range(VECTOR_DIMENSION)]
+        else:
+            another_vector = word_vector[word]
+        new_vector = []
+        for i,j in zip(old_vector,another_vector):
+            new_vector.append(i+j)
+        old_vector = new_vector
+
+    word_count = len(words)
+    for idx,value in enumerate(old_vector):
+        old_vector[idx] = value/word_count
+    vector_str = list(map(str, old_vector))
+    output = ','.join(vector_str)
+    return output
diff --git a/extractfeature/hdfs_ft_preprocessor.py b/extractfeature/hdfs_ft_preprocessor.py
index 52df723..981614f 100644
--- a/extractfeature/hdfs_ft_preprocessor.py
+++ b/extractfeature/hdfs_ft_preprocessor.py
@@ -124,7 +124,7 @@ def preprocessor_hdfs_ft(cluster_directory, anomaly_file_path, wordvec_path, out
                 for f in log_cluster[i]:
                     train_file_obj.write(str(f))
                     train_file_obj.write(' ')
-                if count % 10 == 0:
+                if count % 200 == 0:
                     train_file_obj.write('\n')
                 else:
                     train_file_obj.write(', ')
@@ -138,8 +138,60 @@ def preprocessor_hdfs_ft(cluster_directory, anomaly_file_path, wordvec_path, out
                 for f in log_cluster[i]:
                     test_file_obj.write(str(f))
                     test_file_obj.write(' ')
-                if count % 10 == 0:
+                if count % 200 == 0:
                     test_file_obj.write('\n')
                 else:
                     test_file_obj.write(', ')
-                count = count + 1
\ No newline at end of file
+                count = count + 1
+
+
+def preprocessor_hdfs_ft_split_abnormal(cluster_directory, anomaly_file_path, wordvec_path, out_dic, train_out_file_name,
+                         test_out_file_name, label_out_file_name, pattern_vec_out_path, degree, num_of_lines):
+    anomaly_log_lines = set()
+    with open(anomaly_file_path, 'r') as anomaly_file:
+        line = anomaly_file.readline()
+        lines_str = line.split(' ')
+        anomaly_log_lines.update([int(x) for x in lines_str if len(x) > 0])
+
+    pattern_vec = pattern_to_vec(cluster_directory, wordvec_path, pattern_vec_out_path)
+
+    log_cluster = {}
+    file_names = os.listdir(cluster_directory)
+    for file_name in file_names:
+        with open(cluster_directory + file_name, 'r') as cluster:
+            lines = cluster.readlines()
+            line_numbers = [int(x) for x in lines[1].split(' ') if len(x) > 0]
+            for number in line_numbers:
+                if not (number in anomaly_log_lines and number < int(degree * num_of_lines)):
+                    log_cluster[number] = pattern_vec[lines[0].strip()]
+
+    with open(out_dic + train_out_file_name, 'w+') as train_file_obj, open(out_dic + test_out_file_name,
+                                                                           'w+') as test_file_obj, open(
+            out_dic + label_out_file_name, 'w+') as label_file_obj:
+        count = 1
+        last_i = 0
+        for i in sorted(log_cluster):
+            if i < int(degree * num_of_lines):
+                if i - last_i > 1:
+                    train_file_obj.write('\n')
+                else:
+                    train_file_obj.write(', ')
+                for f in log_cluster[i]:
+                    train_file_obj.write(str(f))
+                    train_file_obj.write(' ')
+                count = count + 1
+            else:
+                if i == int(degree * num_of_lines):
+                    count = 1
+                if i in anomaly_log_lines:
+                    label_file_obj.write(str(count))
+                    label_file_obj.write(' ')
+                for f in log_cluster[i]:
+                    test_file_obj.write(str(f))
+                    test_file_obj.write(' ')
+                if count % 200 == 0:
+                    test_file_obj.write('\n')
+                else:
+                    test_file_obj.write(', ')
+                count = count + 1
+            last_i = i
\ No newline at end of file
diff --git a/extractfeature/hdfs_robust_preprocessor.py b/extractfeature/hdfs_robust_preprocessor.py
new file mode 100644
index 0000000..e6760a5
--- /dev/null
+++ b/extractfeature/hdfs_robust_preprocessor.py
@@ -0,0 +1,166 @@
+# -*- coding: UTF-8 -*-
+import os
+import io
+import re
+import random
+import math
+import json
+import pandas as pd
+import numpy as np
+block_id_regex = r'blk_(|-)[0-9]+'
+special_patterns = {'dfs.FSNamesystem:': ['dfs', 'FS', 'Name', 'system'], 'dfs.FSDataset:': ['dfs', 'FS', 'dataset']}
+
+
+def get_anomaly_block_id_set(anomaly_label_file):
+    datafile = open(anomaly_label_file, 'r', encoding='UTF-8')
+    data = pd.read_csv(datafile)
+
+    data = data[data['Label'].isin(['Anomaly'])]
+    # 16838 anomaly block right with the log anomaly paper
+    anomaly_block_set = set(data['BlockId'])
+    return anomaly_block_set
+
+
+def get_log_template_dic(logparser_event_file):
+    dic = {}
+    datafile = open(logparser_event_file, 'r', encoding='UTF-8')
+    data = pd.read_csv(datafile)
+    for _, row in data.iterrows():
+        dic[row['EventId']] = row['numberID']
+    return dic
+
+
+# log parser_file should be structed.csv
+def generate_train_and_test_file(logparser_structed_file, logparser_event_file, anomaly_label_file, out_dic, train_out_file_name, validation_out_file_name, test_out_file_name, wordvec_path, pattern_vec_out_path, variable_symbol):
+    anomaly_block_set = get_anomaly_block_id_set(anomaly_label_file)
+    log_template_dic = get_log_template_dic(logparser_event_file)
+    session_dic = {}
+    logparser_result = pd.read_csv(logparser_structed_file, header=0)
+    normal_block_ids = set()
+    abnormal_block_ids = set()
+    for _, row in logparser_result.iterrows():
+        key = row['EventTemplate']
+        content = row['Content']
+        block_id = re.search(block_id_regex, content).group()
+        session_dic.setdefault(block_id, []).append(log_template_dic[row['EventId']])
+        if block_id in anomaly_block_set:
+            abnormal_block_ids.add(block_id)
+        else:
+            normal_block_ids.add(block_id)
+    abnormal_block_ids = list(abnormal_block_ids)
+    normal_block_ids = list(normal_block_ids)
+    random.shuffle(abnormal_block_ids)
+    random.shuffle(normal_block_ids)
+    with open(out_dic + train_out_file_name, 'w+') as train_file_obj, open(out_dic + test_out_file_name,
+                                                                           'w+') as test_file_obj, open(
+            out_dic + validation_out_file_name, 'w+') as validation_file_obj:
+        train_file_obj.write('Sequence,label\n')
+        test_file_obj.write('Sequence,label\n')
+        validation_file_obj.write('Sequence,label\n')
+        for i in range(len(normal_block_ids)):
+            if i < 6000:
+                train_file_obj.write(' '.join([str(num_id) for num_id in session_dic[normal_block_ids[i]]]))
+                train_file_obj.write(', 0\n')
+            elif i < 6000 + 50000:
+                validation_file_obj.write(' '.join([str(num_id) for num_id in session_dic[normal_block_ids[i]]]))
+                validation_file_obj.write(', 0\n')
+            else:
+                test_file_obj.write(' '.join([str(num_id) for num_id in session_dic[normal_block_ids[i]]]))
+                test_file_obj.write(', 0\n')
+
+        for i in range(len(abnormal_block_ids)):
+            if i < 6000:
+                train_file_obj.write(' '.join([str(num_id) for num_id in session_dic[abnormal_block_ids[i]]]))
+                train_file_obj.write(', 1\n')
+            elif i < 6000 + 1000:
+                validation_file_obj.write(' '.join([str(num_id) for num_id in session_dic[abnormal_block_ids[i]]]))
+                validation_file_obj.write(', 1\n')
+            else:
+                test_file_obj.write(' '.join([str(num_id) for num_id in session_dic[abnormal_block_ids[i]]]))
+                test_file_obj.write(', 1\n')
+
+    pattern_to_vec(logparser_event_file, wordvec_path, pattern_vec_out_path, variable_symbol)
+
+
+def load_vectors(fname):
+    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
+    data = {}
+    for line in fin:
+        tokens = line.rstrip().split(' ')
+        data[tokens[0]] = list(map(float, tokens[1:]))
+    return data
+
+
+def get_lower_case_name(text):
+    word_list = []
+    if text in special_patterns:
+        return
+    for index, char in enumerate(text):
+        if not char.isupper():
+            break
+        else:
+            if index == len(text) - 1:
+                return [text]
+    lst = []
+    for index, char in enumerate(text):
+        if char.isupper() and index != 0:
+            word_list.append("".join(lst))
+            lst = []
+        lst.append(char)
+    word_list.append("".join(lst))
+    return word_list
+
+
+def preprocess_pattern(log_pattern):
+    special_list = []
+    if log_pattern.split(' ')[0] in special_patterns.keys():
+        special_list = special_patterns[log_pattern.split(' ')[0]]
+        log_pattern = log_pattern[len(log_pattern.split(' ')[0]):]
+    pattern = r'\*|,|\.|/|;|\'|`|\[|\]|<|>|\?|:|"|\{|\}|\~|!|@|#|\$|%|\^|&|\(|\)|-|=|\_|\+|，|。|、|；|‘|’|【|】|·|！| |…|（|）'
+    result_list = [x for x in re.split(pattern, log_pattern) if len(x) > 0]
+    final_list = list(map(get_lower_case_name, result_list))
+    final_list.append(special_list)
+    return [x for x in re.split(pattern, final_list.__str__()) if len(x) > 0]
+
+
+def pattern_to_vec(logparser_event_file, wordvec_path, pattern_vec_out_path, variable_symbol):
+    data = load_vectors(wordvec_path)
+    pattern_to_words = {}
+    pattern_to_vectors = {}
+    datafile = open(logparser_event_file, 'r', encoding='UTF-8')
+    df = pd.read_csv(datafile)
+    pattern_num = len(df)
+    for _, row in df.iterrows():
+        wd_list = preprocess_pattern(row['EventTemplate'].replace(variable_symbol, '').strip())
+        pattern_to_words[row['EventTemplate'].replace(variable_symbol, '').strip()] = wd_list
+    print(pattern_to_words)
+    IDF = {}
+    for key in pattern_to_words.keys():
+        wd_list = pattern_to_words[key]
+        pattern_vector = np.array([0.0 for _ in range(300)])
+        word_used = 0
+        for word in wd_list:
+            if not word in data.keys():
+                print('out of 0.1m words', ' ', word)
+            else:
+                word_used = word_used + 1
+                weight = wd_list.count(word)/1.0/len(pattern_to_words[key])
+                if word in IDF.keys():
+                    pattern_vector = pattern_vector + weight * IDF[word] * np.array(data[word])
+                else:
+                    pattern_occur_num = 0
+                    for k in pattern_to_words.keys():
+                        if word in pattern_to_words[k]:
+                            pattern_occur_num = pattern_occur_num + 1
+                    IDF[word] = math.log10(pattern_num/1.0/pattern_occur_num)
+                    #print('tf', weight, 'idf', IDF[word], word)
+                    #print(data[word])
+                    pattern_vector = pattern_vector + weight * IDF[word] * np.array(data[word])
+        pattern_to_vectors[key] = pattern_vector / word_used
+    numberid2vec = {}
+    for _, row in df.iterrows():
+        numberid2vec[row['numberID']] = pattern_to_vectors[row['EventTemplate'].replace(variable_symbol, '').strip()].tolist()
+    json_str = json.dumps(numberid2vec)
+    with open(pattern_vec_out_path, 'w+') as file_obj:
+        file_obj.write(json_str)
+    return pattern_to_vectors
\ No newline at end of file
diff --git a/extractfeature/k8s/__pycache__/__init__.cpython-37.pyc b/extractfeature/k8s/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a8ef954f9635b7ac4e25d5ef679663622ae9860
GIT binary patch
literal 173
zcmZ?b<>g`kf+OD?;z0Cc5CH>>K!yVl7qb9~6oz01O-8?!3`HPe1o11(*(%1jGA%PF
zwHQd}q$U>SW#*;F<W`pF7ZqpZ7sTY`rzhqm=2RAE7RUJHr#r&fsTCzfiOD5tsfi_}
mMX52_7R52~@tJv<CGqik1(mlrY;yBcN^?@}K=yqGVg>+n{4hlT

literal 0
HcmV?d00001

diff --git a/extractfeature/k8s/__pycache__/log_preprocessor.cpython-36.pyc b/extractfeature/k8s/__pycache__/log_preprocessor.cpython-36.pyc
index c4f5232844f5ef1bc900b85d7093bf2bd56a4515..f9df3d5de99bc249ad3a972af9ece2d8a8b53f5c 100644
GIT binary patch
delta 16
XcmeCO>#}1v=H=zmUo&kZJF`3hCJh8S

delta 16
XcmeCO>#}1v=H=x|Ug5lvomn0LCBg(3

diff --git a/extractfeature/k8s/__pycache__/log_preprocessor.cpython-37.pyc b/extractfeature/k8s/__pycache__/log_preprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a0756f9daa54394623ff33c0a34ac5f8e832261
GIT binary patch
literal 7916
zcmcIpU1%IxcCK4h-Cb4v-)c*;#`bD#%eHJ;lD#vtlfjvvwLLo_Ud3yB*G{cH?RMXi
z+*1EYw@Tx7Qw?F|yo7O9fn*8$5Lyt1$*?S88A2ei%)=7G639N}ArxT=B!p22OUOeQ
z{E~d<RCi0(%;X^uiMn;}xxaPq`T5T2uV=H4hQGxh{ovg{`kJQwGgXFvHi$Rz_;+<p
zlbRF_&84^A5KZ0HMa(l?6Fj4lXeQm{fR}Pp(sV7EaP1Y%b?Ta%mPt1wQ*KsTZqCa{
zdqQ*bUe?RuU677j1TPJ`Br{&FKPIy~LTZP#2~ARuvdkag7Qj7(F-5$`JzHvPy86f~
zyAv(bJLFD$Lz5*r_C%9op1x(cld>!iJ<;65a$HW}JtZ&7NqP8*a9@y9)g$t&@&$PW
zwWIQ=JcjmT@+J8dIStNN<csn+IMd#X^2D0Ht*bloq??Ub+!kuuJG`X{t(Ekq=<OZH
zo@TJ?Q(L-wf@`M-wUb;sGpNmQZFW#Q<(+U(gHw=axAgD-t9!<q^&I;CEkP&hp2bO?
z>-^k1)1TYXu4wmu>Yn%JJ!ea}3!PWI#hBg_XgTFx@XmQo%#n#n&ATv(dDfEH>j&Ni
zcj3CWEjB*5tzm@)nV8VL)AF30dum|sv-14D8rH<wp7qZ0`yAf$YbL&b64b()#?%aU
zO*P^#f?8z$qIZ({i{K0OKKK{FC+d`Y8mqs5Te}Tu`j&P>yMy;lZT96|lDVZ>5t&z8
zn@zlprhIvqL~~c5=pLD0xKa(O3)k8!mm8aY;Hev)zu5@Ruf6IobPvrhc%491Yr(Qt
z4K|gxz-)W7Rd2WE8|{^D`8!@MXe)mqs5d=XUtXRMIzhKQzu<ctn_jEtEi~$_&CdLK
zHR#&4cm+&cm>;Y=7=9rV^KPkvUHqo^*slx+t8VdN#|3w6)D0ULOxaDnS8Z&1{z8qu
zclbwN_$D5|jKb4gNWQB};TlqhZW%ZqGjgugTi()U^MR)n_1`s6EX^h&t5RuIn_i_7
zIh9JYEjJrPXDXGA&1z%NlDYoYty@=bELE=k#?sX&v-It6e*Nl=n_T-dP315X7th|k
z{My}bK3=XjJRk3dS5>WgYvpe9@gp4F!}j`Jd`Y!cZ9Mkt{#_ixTSNN!)4z+;r>>QC
z<*loB&GY@XnqPmcQdsE}9^XV^2~#j<bQTu4WOFW4m;XW`MNbQ~Ku8^0@wYt@=v!J(
zmqyQ!X3y;WMJW0P>%&LAM40HCGO?PFNtt>oc7%T>NP?HzfexuN+-mK$s_jryt6`#N
zO1r0RoC&PH9qK#Kw$Kcm(72?9hIA&ho)ND}JvT7RD(sj{%gj@qX698hWUG2fnUnb^
zT9~|o^?J$9I8pa<y;P|8vtf#6-uUD;R$|%LV%h)vs#3#k7nW{Cx_ZF(9BI$QiEd&u
zSU&$M)WU+Swk0m5YtJ<O6_Vzew^agX6(#*Z)oa15uEw|uLBXv(6Vo-47y5%0*XZPL
z;$c}^(L(KB2Is1UVoUsizG2>sC4m&M-ojjwKED`=&Yp~@JO)2=E;Z_YP-{2WUw@M(
zqnVRh_r!Wt`QDyd%wKA>Yt@GTI*4Ka*<56{*S%JBlqIAZK$lyUs+0<1vNtp`d@qP>
z<y9p#)AJ+852`BgAJv10QIb!=i;M>5Nziw%M`@@oO?45|M#<_rmXIT6Mb>LkQhGQ$
z4+GX4^&m2^=B(vf_2tTPUHL(js<$cuBsoga&S?4JoRu~2v72dB{h%^fB{FGOQJTtF
zD}8kshj8GaqWpmqIY+0ph(d$(XG9XEDRSbNC^2OUT{yzRe^Mm%66hSBBY4z2#*8$D
z3?n_PDgOc@(E890$RX*B==>ru0yBgpvQPM=rw55Z54BKdk4oC~r_dn*={I_bAPKEX
zc78uJNTwmR--FbefwgL{I`FEiVG43?N)y^(Zd~J*G`IY9nCfSCw0>45u(Cy(2Mw@T
z11x9&C}@BkCi}V2W)1k`+ggzC7s4cKS|TWh0-7dxZb5SoSJ|&68%N+RNH46VSS5r~
zMSL@hy&4Dgf0A%DfsxRbNt)^l?oLgi;oEqI(mfO3B?-sCoPVKwmm<CGLxv#mvaX`3
z{bIh{Zb(lJherujZLN6fMcTm&R2-$^7!_nHRR%?rV3(n?#5hjHX)4Z8ae|7IRLr25
zbzFyRH~Rx$k*GzYu4eIe)9|X5_Hu=#HZ>4pb(V(CQE{G%FF1s#__>e%2D<vRyaur}
zA-H80-6TXc1Ij^}W3eUCwS=Q9;7YAVhKK&h$Bm5qzZ-1i%}|#*M6CdEgxK~qHm$le
z!TU)dR`ouMb3Z|Xwk7(>9Ze=DG#L4xtdQu=hY5(<DTpm#=V6#|jC%)kYD6ddXk!t4
z2O?-mi_~NPd;nsYQQA<Ash$<W9Z09|kf0JQrI93rvDwC&Hr#`@W~jdjZOm-2I1Apl
z1J{%eoiKe^>t$r7m)+JoKc)R`>-Rq1ze7N#W$aOxso#ZK$}GU8P;Z7AnWNECpBTYk
zuIJ<W*Dxzu^LVGjOfT2^Xx|(Kp5x!bOg}A)*m-JSy|h=y4)c3+qHo-0VEsRo`Ch&?
z&8=g!A}P2m^E`hp524MJW4(N5F33tf%+oHnbe#QEuQ1$s2xv}bhIR_D_p{qtm=1Zr
zQtuVxo5r{2f*f|9hY3l`@f|^D0LNMEoZ*>^|Ld&!#uR3ylh5$Ur^6zjM>@cFZ3zaU
z_vst>4kmipjZa7Geuz~eBXSboq^l42&m#2$vl!}6hO=OX44p-6@_6ho>6Q+R(A7T-
zN6_7Ge2N`qwgk!)_LbN+)X8`rX~sPI?$Mmw8uO(_EwKb)?X3EgR(q9h-R>8l9~ru3
zlIdP4{sw7=B^tvgQi^ApfVrjhv0p>|-WTlmz!!$2al`dDK84(qCcuD`Caj3w81(1x
zm*~$F^ryi3Qw+zR*P76V34dYfnV5<3ia5*Ge1L7bCL!|O|HfZ8$54L&xi2n6qT1Ex
zrX%6J?WP8xht2*j!4B6X5cEu(e<ohrB_rb+o*(QIBzh+1Zaouco}cf+LRYVzn=Qp=
zlw5IS!X6T+=)h$A3MNTY)5NzQdL3C`!E~{aB@-Pb9;tfZMdoU|-iplSI#5QU(S8K9
z^;H^3PK6D6WURN>BZqr4f|)I=S7-u*0E!v3+YQ&ItpnFI*WDQ2RB8{a${oRus#5qo
z56vSM_7aI@*CwyRj=^QLLOq#>{dgTWUE5R@>VocWo`Fl<A7PgqGxpZbQcoVpIP7KY
zGM<&;6*u4;356YmHyPf-)j0O5VVu8>g0KytdrLpX4q+1R;J7$u5SLs6x9T|~2ZVDH
z^`t22)8I~m8VBw%;Zml!pM!b<G>ytpPknMgJ8(Qh-Xi+YD8fy|ALkYaeG6E(EEECW
z$mn!BBcMQnGV($npc2Bm!)ufS0H`?VzNx{^ivX}HFkt1x_wV%d&h?P2-8%>i%+T0E
zV6tWcAc;OeQ~*C95wejE2G*92RzN1!#u2}2ZOl<`@?!`H=JvxJtYph1>yM70!3hz>
z?Dskhfl^D|>{5H0-28OCwZ0kb68_oc5bhGFuD<9*;?Zn6N~|j&c!m{GhAx<)0U;8;
z?h#Ok(Z@-Q1>}ZsStJ)%5FxAU#N*Dy;_>Plc!)+J_YDAP79sUp;Ke9n6iITF5T&Vo
zARK>%PCh{`O{Z&$VJ~5YX_3Q4WrVtqwi;<P{ZS}LcXSz#|2_(UL|8GEk@`*CRUZi_
zSUw$R8B<0e$ikBk0fk#2Y7&5zASK3KkqF?pPh!-EDfCG&KRMzv1%47^Q)MkoNUIEg
z76KoT|ArN_m((rniS3tq15}+1SByu0ovH<##4g<j@Fl}F#+sI^2N&>Mbe-Tr<zdsW
zklH%I#Sv{&YNGP6y{V8qBeGd<4JhBMAsImT^86{QyIAJ$@c1MN8lSBLHJD^-5^pE~
z^{J6zqrVcKk?enl6Nj7|B=<N6t&dY=jD^4&cnFd&O~~X$gxhCBvv2NDfN+chUzTD}
zMxO(139Q6Yy|`bh37_HafJD@P;$DErdZ|vx2n-ks0Sf8UDm;b}T4=KoeA^NfO7z{*
z;cuMI>wy*6<Uu){nTHQdk#Cpwn(qI6KvSsl<IurgZ1_2gFbMDF$3O~6a2>Aw&TS;3
zS|>3ZX>V#x0P|#mv`i0mo?3Wi0pDw_h%i&udf84Ru*i>MulIXa=T2yVx(*a*^R9DY
zj<CVD@SlV^8Z9%7m>Q^Au_qn22ibm3X2aYLp}nwW$yAsPtxJ@YdxiE!Hlg!`=MQ^G
zp7tD>0^WKM+E~%;<<L8S82!{|qcwl^+?xONxix?F+?t>98Rh6SfZr_OmpoSeaQ`Wx
z{sRisRuQNLMVWi5;|`OkmqLViGXIot2-U}i^<F_1Lj23(nvQ@j85WMB2X<fVd?%zt
z$WkvCl;eE@<t2b034_~VZXf}Kxske|K6Ij6g7ITwTi^H|c2*<_qckM#8oUSK>13~g
z;Qs!g)XEa{R7Xlfmt$;AVFUVyLg3;3)-+5)kIPG+!v=AtB}OA^0h*#>vpP2&Ydhfz
z<X&J}Jw$FZ-PFu-qg@SVx^}BQ9VhqH>(r-s{*CW0p8vIb=kCtO?~}8_O~~N!x(C9n
z0&J|HAO8O1jRApC>B@VYs3b6?ZliH_JdO-Us6@RDZX6?WG)R^^wlR!9IF=-JS9ee#
zr{M#^AlsoU*z0CBs6CAI*1DU3<*7c761;iWY<krenSZIiOGDYpml&?7C1Siwb0r6#
zH?sGx)AAn0D8!Evlxy@Afd};x3fCSrMq<^a#AgLTD1Hp^-)c1CO&MUSZ_x-k0d<3l
z=dByuQ9A~LzeJ<Iib8{@Kh6Q>F-8&=3>-Zrn7VOMG6*M7ZY9p0SmG=kjINgr3RWpR
zb%?{Q1v3Zak<&8+qnW`hQ+Nn<5Um<mjZh8$rBRN|=6&SU$YjFk6ao?d0zjEUp#{dO
zNpOHdBLt`~1<0xlYiPAV4~!~un^c2XlZge?pc)0CRMWYZ8q|yst_&QQMu_x3!eu!y
zT7nb>zYkFM(S}+5ButQ*fx3O%vw~~@qt>%K7lT|(v-vvSbHY@Rjyckv)IulzeL#I2
zIz9Wkw(&7kpNuemp@9Q*WSV{(2ppN&t3mD23ZszXH-+6<&h;M$mE&Ch@lc2Jr5gPP
z@EWK*^Ir>*b>f<-eu#HrO~V^u|HcpZyA1|A#W>&NCIMfNf$NUN?}hkPLBhY@VkPO8
z4zvJNcZ<*SAZp$5=ULSUt+r}H1Pi#@*n3l62|k<U-9QGB<Ba+*Qt;_N!{yii5>inA
z5pprJ`6xF?qf~tO>zc<II?i<{iYn9<Dz2i4tQ+liaGhcdt65#c&kxFvaakJsVaSct
zC+lmR(K$vVS&>Plf#vM>W)OdFWHE{Fi&8}4tD+1M!*3aOKpHcNd?pr9q%@R|;!Gt2
zwMeYRih4lV@?OWQZ3Z3^_nftV5mN4x{Ucam=}F*eO6|n|q+^uuq5P%A+Gi4yry3w0
zC57lI2-T(a@iRpe*Hx)>_4$SW=bPMjiK>akzx)z)HtP7@X$hB(kHs%47y0&F(}S^6
z9AG9u;l~RP&2H*p)qmKiKZv75vJI~D2oX~I(am}n*=Jhq1Qjz>w5j-QDqJe=QL#bA
z5h^ZHaT<kN8vT$|p}Qn-?UqS426@IkZh^SC^iAM#&Lz4m5HuoA8P3zBlyKvqhcUdm
zPgV8_WYG9N6j`#^vnZl`Dgr7tsd$fyM^um(VF^+lqF6NNh+_GCOceVaw)=bH#8e)w
zk##BlQR;Q_U_MP^L2URjP2DsgKqd)M9KKOG&H)?+d!|`PSVycQ&TSa(vXyr(TgS<a
TBOHV<ewiRn@mvQ5u`K=v#@~3r

literal 0
HcmV?d00001

diff --git a/java/deeplog_java.py b/java/deeplog_java.py
new file mode 100644
index 0000000..8b8a481
--- /dev/null
+++ b/java/deeplog_java.py
@@ -0,0 +1,266 @@
+import os
+import linecache
+import sys
+import time
+import torch
+import torch.nn as nn
+path = "C:\\study\\code\\LogAnalysis\\"
+sys.path.append(path)
+from logparsing.drain.HDFS_drain import get_hdfs_drain_clusters
+from anomalydetection.deeplog.Model1.log_key_LSTM_train import Model as Model1
+from anomalydetection.deeplog.Model2.variable_LSTM_train import Model as Model2
+import sys
+import shutil
+
+log_detect_name  = sys.argv[1]
+use_model2 = sys.argv[2]
+
+# log_detect_name = 'detect.log'
+# use_model2 = '1'
+
+log_file_dir = path+'/Data/log/hdfs/'
+log_file_name = 'HDFS_split'
+base_dir = path+'/java/'
+# log_detect
+log_detect_dir = base_dir+'/detect_log/'
+# Drian
+drain_out = log_detect_dir + 'clusters/'
+bin_dir = path + 'HDFS_drain3_state.bin'
+
+WORD_VECTOR_FILE = path + '/Data/log/hdfs/word2vec_HDFS_40w'
+# model
+model_dir =  path+'Data/Drain_HDFS/'+'deeplog_model_train/'
+N_Clusters = 31
+window_length = 4
+input_size = 1
+hidden_size = 20
+num_layers = 3
+model1_num_epochs = 100
+model1_batch_size = 200
+model2_num_epochs = 50
+model2_batch_size = 20
+learning_rate = 0.01
+num_candidates = 3
+mse_threshold = 0.1
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+shutil.rmtree(drain_out)
+os.makedirs(drain_out)
+shutil.rmtree(log_detect_dir+'logvalue/')
+os.makedirs(log_detect_dir+'logvalue/')
+
+
+def load_word_vector():
+    word_to_vector = {}
+    with open(WORD_VECTOR_FILE, 'r') as r:
+        for line in r.readlines():
+            list_line = line.split(' ')
+            value = list(map(float, list_line[1:]))
+            key = list_line[0]
+            word_to_vector[key] = value
+    return word_to_vector
+
+def get_sentence_vector(word_to_vector, sentence):
+    words = sentence.split(' ')
+    old_vector = [0.0 for i in range(10)]
+    for word in words:
+        if word not in word_to_vector.keys():
+            another_vector = [0.0 for i in range(10)]
+        else:
+            another_vector = word_to_vector[word]
+        new_vector = []
+        for i, j in zip(old_vector, another_vector):
+            new_vector.append(i + j)
+        old_vector = new_vector
+
+    word_count = len(words)
+    for idx, value in enumerate(old_vector):
+        old_vector[idx] = value / word_count
+    vector_str = list(map(str, old_vector))
+    sentence_vector = ','.join(vector_str)
+    return sentence_vector
+
+def generate(name,window_length):
+    log_keys_sequences=list()
+    length=0
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            line = list(map(lambda n: n, map(int, line.strip().split())))
+            line = line + [-1] * (window_length + 1 - len(line))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            # log_keys_sequences[tuple(line)] = log_keys_sequences.get(tuple(line), 0) + 1
+            log_keys_sequences.append(tuple(line))
+            length+=1
+    return log_keys_sequences,length
+
+def load_model1(model_dir,model_name,input_size, hidden_size, num_layers):
+    value_length_of_key = [10] * (31 + 1)
+    num_classes = len(value_length_of_key)
+    print("Model1 num_classes: ", num_classes)
+    model1_dir = model_dir + 'model1/'
+    model_path = model1_dir + model_name
+    model1 = Model1(input_size, hidden_size, num_layers, num_classes).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+
+def load_model2(model_dir,epoch,input_size, hidden_size, num_layers):
+    model2_dir = model_dir+ 'model2/'
+    model2 = []
+    value_length_of_key = [10] * (31 + 1)
+    for i in range(len(value_length_of_key)):
+        if value_length_of_key[i] == 0:
+            model2.append(None)
+            continue
+        input_size = value_length_of_key[i]
+        out_size = input_size
+        model_name = str(i+1) + '_epoch=' + str(epoch)+ '.pt'
+        model_path = model2_dir + str(i+1) + '/' + model_name
+        if not os.path.exists(model_path):
+            model2.append(None)
+            continue
+        model = Model2(input_size, hidden_size, num_layers, out_size).to(device)
+        model.load_state_dict(torch.load(model_path, map_location='cpu'))
+        model.eval()
+        print('model_path: {}'.format(model_path))
+        model2.append(model)
+    return model2
+
+def generate_log_key_and_value():
+    print("generating log key...")
+    get_hdfs_drain_clusters(log_detect_dir+log_detect_name, drain_out,bin_dir)
+    log_to_key = {}
+    for i in range(0,N_Clusters):
+        if os.path.exists(drain_out+str(i+1)):
+            with open(drain_out+str(i+1),'r') as file:
+                for line in (file.readline().split()):
+                    log_to_key[line] = i+1
+    print(log_to_key)
+    # with open(log_detect_dir+'logkey.txt','w') as file:
+    #     for i in range(0,len(log_to_key)):
+    #         file.write(str(log_to_key[str(i)]))
+    #         file.write(" ")
+
+    print("generating log value...")
+    word_to_vector = load_word_vector()
+    logkey_to_logvalues = [[] for i in range(N_Clusters + 1)]
+    logkeys = []
+    with open(log_detect_dir+log_detect_name,'r') as file:
+        lines = file.readlines()
+        for i in range(0,len(lines)):
+            logkey = log_to_key[str(i)]
+            logkeys.append(logkey)
+            vector = get_sentence_vector(word_to_vector,lines[i])
+            logkey_to_logvalues[logkey].append(vector)
+    logkey_line = ' '.join(str(logkey) for logkey in logkeys)
+    logkey_writelist = []
+    logkey_to_logvalue_writelist = [[] for i in range(N_Clusters + 1)]
+    logkey_writelist.append(logkey_line + '\n')
+    for logkey in range(1, N_Clusters + 1):
+        if len(logkey_to_logvalues[logkey]) == 0:
+            logvalue_line = '-1'
+        else:
+            logvalue_line = ' '.join(logkey_to_logvalues[logkey])
+        logkey_to_logvalue_writelist[logkey].append(logvalue_line + '\n')
+    print(logkey_writelist)
+    with open(log_detect_dir+'logkey.txt', 'w') as f:
+        f.writelines(logkey_writelist)
+    os.makedirs(log_detect_dir+'logvalue/',exist_ok=True)
+    for logkey in range(1, N_Clusters + 1):
+        LOGVALUE_FILE = str(logkey)
+        with open(log_detect_dir+'logvalue/' + LOGVALUE_FILE, 'w') as f:
+            f.writelines(logkey_to_logvalue_writelist[logkey])
+
+def log_predict(use_model2):
+    model1_name = 'Adam_batch_size=' + str(model1_batch_size) + ';epoch=' + str(model1_num_epochs) + '.pt'
+    model1 = load_model1(model_dir, model1_name, input_size, hidden_size, num_layers)
+    model2 = load_model2(model_dir, model2_num_epochs, 10, hidden_size, num_layers)
+    start_time = time.time()
+    criterion = nn.MSELoss()
+    test_normal_loader, test_normal_length = generate(log_detect_dir + 'logkey.txt', window_length)
+    print('predict start')
+    FP=0
+    with torch.no_grad():
+        for line_num, line in enumerate(test_normal_loader):
+            model1_success = False
+            for i in range(len(line) - window_length - 1):
+                seq0 = line[i:i + window_length]
+                label = line[i + window_length]
+                seq0 = torch.tensor(seq0, dtype=torch.float).view(
+                    -1, window_length, input_size).to(device)
+                label = torch.tensor(label).view(-1).to(device)
+                output = model1(seq0)
+                predicted = torch.argsort(output,1)[0][-num_candidates:]
+                if label not in predicted:
+                    FP+=1
+                    print(FP)
+                    model1_success = True
+                    break
+            if (model1_success):
+                continue
+
+            if use_model2=='1':
+                seq = []
+                for i in range(31):
+                    with open(log_detect_dir + '/logvalue/' + str(i + 1), 'r')as f:
+                        key_values = f.readlines()
+                        key_values = key_values[line_num].strip('\n')
+                        if (key_values == '-1'):
+                            continue
+                        seq.append(key_values.split(' '))
+                # 将字符串转为数字
+                for k1 in range(len(seq)):
+                    for k2 in range(len(seq[k1])):
+                        seq[k1][k2] = seq[k1][k2].strip('\n')
+                        seq[k1][k2] = seq[k1][k2].split(',')
+                        for k3 in range(len(seq[k1][k2])):
+                            if (seq[k1][k2][k3] != ''):
+                                seq[k1][k2][k3] = float(seq[k1][k2][k3])
+
+                # 补全
+                for i in range(len(seq)):
+                    if (len(seq[i]) < window_length + 1):
+                        for j in range(window_length + 1 - len(seq[i])):
+                            seq[i].append([0.0] * 10)
+                model2_success = False
+                # 预测
+                for i in range(len(seq)):
+                    if (model2[i] == None):
+                        continue
+                    for j in range(len(seq[i]) - window_length):
+                        seq2 = seq[i][j:j + window_length]
+                        label2 = seq[i][j + window_length]
+
+                        seq2 = torch.tensor(seq2, dtype=torch.float).view(
+                            -1, window_length, 10).to(device)
+                        label2 = torch.tensor(label, dtype=torch.float).view(-1).to(device)
+                        output = model2[i](seq2)
+                        mse = criterion(output[0], label2.to(device))
+                        if mse > mse_threshold:
+                            FP += 1
+                            model2_success = True
+                            break
+                    if (model2_success):
+                        break
+    if(FP==1):
+        print("predict result: abnormal")
+    else:
+        print("predict result: normal")
+
+
+
+
+
+generate_log_key_and_value()
+log_predict(use_model2)
+
+
+
+
+
+
+
+
diff --git a/java/detect_log/clusters/1 b/java/detect_log/clusters/1
new file mode 100644
index 0000000..f647c99
--- /dev/null
+++ b/java/detect_log/clusters/1
@@ -0,0 +1 @@
+0 2 3 
\ No newline at end of file
diff --git a/java/detect_log/clusters/2 b/java/detect_log/clusters/2
new file mode 100644
index 0000000..7b57bd1
--- /dev/null
+++ b/java/detect_log/clusters/2
@@ -0,0 +1 @@
+1 
\ No newline at end of file
diff --git a/java/detect_log/clusters/3 b/java/detect_log/clusters/3
new file mode 100644
index 0000000..b4fe77f
--- /dev/null
+++ b/java/detect_log/clusters/3
@@ -0,0 +1 @@
+4 5 8 
\ No newline at end of file
diff --git a/java/detect_log/clusters/4 b/java/detect_log/clusters/4
new file mode 100644
index 0000000..cea0e89
--- /dev/null
+++ b/java/detect_log/clusters/4
@@ -0,0 +1 @@
+6 7 9 
\ No newline at end of file
diff --git a/java/detect_log/detect.log b/java/detect_log/detect.log
new file mode 100644
index 0000000..2cefae8
--- /dev/null
+++ b/java/detect_log/detect.log
@@ -0,0 +1,10 @@
+081109 203518 143 INFO dfs.DataNode$DataXceiver: Receiving block blk_-1608999687919862906 src: /10.250.19.102:54106 dest: /10.250.19.102:50010
+081109 203518 35 INFO dfs.FSNamesystem: BLOCK* NameSystem.allocateBlock: /mnt/hadoop/mapred/system/job_200811092030_0001/job.jar. blk_-1608999687919862906
+081109 203519 143 INFO dfs.DataNode$DataXceiver: Receiving block blk_-1608999687919862906 src: /10.250.10.6:40524 dest: /10.250.10.6:50010
+081109 203519 145 INFO dfs.DataNode$DataXceiver: Receiving block blk_-1608999687919862906 src: /10.250.14.224:42420 dest: /10.250.14.224:50010
+081109 203519 145 INFO dfs.DataNode$PacketResponder: PacketResponder 1 for block blk_-1608999687919862906 terminating
+081109 203519 145 INFO dfs.DataNode$PacketResponder: PacketResponder 2 for block blk_-1608999687919862906 terminating
+081109 203519 145 INFO dfs.DataNode$PacketResponder: Received block blk_-1608999687919862906 of size 91178 from /10.250.10.6
+081109 203519 145 INFO dfs.DataNode$PacketResponder: Received block blk_-1608999687919862906 of size 91178 from /10.250.19.102
+081109 203519 147 INFO dfs.DataNode$PacketResponder: PacketResponder 0 for block blk_-1608999687919862906 terminating
+081109 203519 147 INFO dfs.DataNode$PacketResponder: Received block blk_-1608999687919862906 of size 91178 from /10.250.14.224
\ No newline at end of file
diff --git a/java/detect_log/logkey.txt b/java/detect_log/logkey.txt
new file mode 100644
index 0000000..4b52ee1
--- /dev/null
+++ b/java/detect_log/logkey.txt
@@ -0,0 +1 @@
+1 2 1 1 3 3 4 4 3 4
diff --git a/java/detect_log/logvalue/1 b/java/detect_log/logvalue/1
new file mode 100644
index 0000000..f60f1ab
--- /dev/null
+++ b/java/detect_log/logvalue/1
@@ -0,0 +1 @@
+-0.27948891666666664,0.39378741666666667,0.4394363333333333,0.4158543333333334,0.9347174166666669,-0.08590000000000002,-0.5342015,-0.36786,-1.1335827499999998,-0.6025183333333334 -0.34474441666666666,0.46685858333333335,0.5180023333333333,0.5142760000000001,1.0520995,-0.20836200000000002,-0.4979293333333333,-0.19980916666666668,-1.1568605833333332,-0.6416392500000001 -0.5807055833333334,0.34877700000000006,0.5318064166666666,0.6473328333333334,1.0815334166666668,-0.37340983333333333,-0.2918375,-0.3758109166666667,-1.3126602499999998,-0.6551390833333334
diff --git a/java/detect_log/logvalue/10 b/java/detect_log/logvalue/10
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/10
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/11 b/java/detect_log/logvalue/11
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/11
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/12 b/java/detect_log/logvalue/12
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/12
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/13 b/java/detect_log/logvalue/13
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/13
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/14 b/java/detect_log/logvalue/14
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/14
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/15 b/java/detect_log/logvalue/15
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/15
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/16 b/java/detect_log/logvalue/16
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/16
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/17 b/java/detect_log/logvalue/17
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/17
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/18 b/java/detect_log/logvalue/18
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/18
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/19 b/java/detect_log/logvalue/19
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/19
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/2 b/java/detect_log/logvalue/2
new file mode 100644
index 0000000..760d7e6
--- /dev/null
+++ b/java/detect_log/logvalue/2
@@ -0,0 +1 @@
+0.7032797777777778,-0.2436938888888889,-0.16089766666666666,-0.024760222222222225,0.4287812222222222,0.505934,0.17868633333333334,0.4231786666666667,0.08776533333333335,-0.18805511111111112
diff --git a/java/detect_log/logvalue/20 b/java/detect_log/logvalue/20
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/20
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/21 b/java/detect_log/logvalue/21
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/21
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/22 b/java/detect_log/logvalue/22
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/22
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/23 b/java/detect_log/logvalue/23
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/23
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/24 b/java/detect_log/logvalue/24
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/24
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/25 b/java/detect_log/logvalue/25
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/25
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/26 b/java/detect_log/logvalue/26
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/26
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/27 b/java/detect_log/logvalue/27
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/27
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/28 b/java/detect_log/logvalue/28
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/28
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/29 b/java/detect_log/logvalue/29
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/29
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/3 b/java/detect_log/logvalue/3
new file mode 100644
index 0000000..79646df
--- /dev/null
+++ b/java/detect_log/logvalue/3
@@ -0,0 +1 @@
+0.0664019999999999,0.18280827272727274,0.28105936363636363,0.2908666363636364,1.4609825454545453,0.28252763636363637,0.3609693636363636,-0.17393918181818188,-1.6194566363636362,-0.6568330909090908 0.08106327272727269,0.17677836363636365,0.27736190909090913,0.28871454545454545,1.4653256363636364,0.29304009090909094,0.36668563636363627,-0.1619914545454546,-1.6175321818181816,-0.6542397272727273 -0.10534100000000005,0.09833072727272728,0.2820625454545454,0.3601499090909091,1.5713586363636365,0.22264509090909093,0.4250600909090909,-0.34029572727272733,-1.7507773636363633,-0.7001486363636363
diff --git a/java/detect_log/logvalue/30 b/java/detect_log/logvalue/30
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/30
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/31 b/java/detect_log/logvalue/31
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/31
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/4 b/java/detect_log/logvalue/4
new file mode 100644
index 0000000..2f33070
--- /dev/null
+++ b/java/detect_log/logvalue/4
@@ -0,0 +1 @@
+-0.7186064615384615,0.4540127692307693,0.4793014615384616,0.5282222307692307,1.2471863846153846,-0.4967562307692308,-0.2032558461538462,-0.3366557692307693,-1.765158,-0.28286999999999995 -0.7186064615384615,0.4540127692307693,0.4793014615384616,0.5282222307692307,1.2471863846153846,-0.4967562307692308,-0.2032558461538462,-0.3366557692307693,-1.765158,-0.28286999999999995 -0.485389923076923,0.6849405384615385,0.4014096153846154,0.28411176923076914,1.465224923076923,-0.21241961538461537,-0.00538892307692312,-0.26430853846153846,-2.068485,-0.2633912307692307
diff --git a/java/detect_log/logvalue/5 b/java/detect_log/logvalue/5
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/5
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/6 b/java/detect_log/logvalue/6
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/6
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/7 b/java/detect_log/logvalue/7
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/7
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/8 b/java/detect_log/logvalue/8
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/8
@@ -0,0 +1 @@
+-1
diff --git a/java/detect_log/logvalue/9 b/java/detect_log/logvalue/9
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/java/detect_log/logvalue/9
@@ -0,0 +1 @@
+-1
diff --git a/java/java.iml b/java/java.iml
new file mode 100644
index 0000000..c90834f
--- /dev/null
+++ b/java/java.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/java/out/production/java/deeplog.class b/java/out/production/java/deeplog.class
new file mode 100644
index 0000000000000000000000000000000000000000..954dd5a4ddcf2437189affc301dea791ffb93ed7
GIT binary patch
literal 1831
zcmZuyU0WMP7(Ek`YzPa5P(ISu8ft4vDbUuBN>Zx@p;E$!1gWu6TsD)iG}+CwyRqfQ
zzr%ICfIjxoOE3H{{sr;OCV@rClkCpSdEfJX%$dnw|NQm`fE?;cM9`xlsw0NDz~H{|
zvoTvY?Aq-5?!GjAf%qfKw*1EeJ(Jn(1QG(tiu4K1)SX%aNda-c7drYi4CuIo!E-Fd
z1J9QYIy)_LjT8cY%bDG9E!!{pt~46?1X2Pwl8djU+43#BmT4aNdybv)OxJ4qnW|Nn
z8QXa^lf-3=Xc*NohH-)63%CTdredz!3^v)07F5GfWX`J!t_b8i5f*Y~&u>)@%BEA1
z<$_atVjJ}X&+^KOyIhg7$s%{uWTttLx#jVj#5KI9;ku6ZkvU&$5Q817I&Q;YxpNm}
z@FJ8{yT$ozXsMHD5;yRHhMPKW;X{F;pwys{&6e$34ar@oN&j>*G8rc6Eaw%=q>e1^
z2t*~@D{%XQl)|nXu478i%WJqRFxu_#e5+cOuB>cIqas}mGXm)@wrn?Bot<pMN!-JI
z4G(mDgpbeV-XRwlVD2G5fw6A>ltu_(1=ILcY4f4LcqjG$m}Iw;n8KVg%25<&M(u34
z<qChK<8x5N2`a8quxvK)ayMpJAj<Yj9Z%F@h?#ZAld7Br9baLQ;(BFR{*vPchhbU6
z(~t_6*B4)#vguonox~EBbv#ofmjXQ&eZzdY<r=2cP!PC&wx7$kFI~6Q^kpUFzXGza
zGhj!8+`k1%lyoZhwN_(Sx?9F>ohzw=V;c2s!?o1Bvl#XFEKi`f(DBccpkY`%6{C}3
z1)d%Y;%1{lm(>5oNUTX&u+|XP6^cMa(T=wv?#+3vsKE80-GaHEo&Qj`Xp#vLU(^uA
zVz@P?H+(_%0)6cpX-8cZJQ*!d?ljo0`U8Rfp!KUpGw6(lUE-&W_B2@CEeup}iqNX)
zv|Ljz1(bTHk27lHFo88*o9Yt*Wr8@L8=TGZE%=VzIfi%>z~AtxU_iqrpL*K>MQm}_
zi*K<_5;}`tNThMiOKF8v@9MAUOAQ@iIF*k5j!3Czs#uENExyIowYQj<KEmx>^c|*3
z?=W3TM~^Ujj8Bg7SuQ4W@pSA6`BE<aCkDupj+5o^<Zre5g)*Igeu%*}Qha@ggnF;O
zVG0A-g`{rPyMYz-K&Ac+sUXcBBw0ou(SBrj$!9T$#~9){NuhvYJjV#i7^Ux3vW%gM
zaqM&4KpHl#z~!Z>$}h9z4CZP79V28hhZ4R=lw3FQf@qA46ZnB>oE%a7h#lJ5!RRNB
z*d7>+0*;$7X~*l9Q7Rlo7#Vno*a=BA4OI=-y5D;mrZ@&hPFMl|@H!xbhNY((R-&lU
OIZC=c{`NVJ;N`#A2fGgd

literal 0
HcmV?d00001

diff --git a/java/src/deeplog.java b/java/src/deeplog.java
new file mode 100644
index 0000000..3e496a9
--- /dev/null
+++ b/java/src/deeplog.java
@@ -0,0 +1,29 @@
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.Scanner;
+import java.io.IOException;
+
+public class deeplog {
+    public static void main(String[] args) throws Exception {
+        String detect_file = "detect.log";
+        String use_model2 = "1";
+        System.out.println("\nExecuting python script file now.");
+        try {
+            String cmds = String.format("python C:\\study\\code\\LogAnalysis\\java\\deeplog_java.py %s %s",
+                    detect_file,use_model2);
+            Process proc = Runtime.getRuntime().exec(cmds);
+            BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream()));
+            String line = null;
+            while ((line = in.readLine()) != null) {
+                System.out.println(line);
+            }
+            in.close();
+            proc.waitFor();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+}
diff --git a/l_a_d_bi_lstm.py b/l_a_d_bi_lstm.py
new file mode 100644
index 0000000..8ba6690
--- /dev/null
+++ b/l_a_d_bi_lstm.py
@@ -0,0 +1,91 @@
+# -*- coding: UTF-8 -*-
+from extractfeature.k8s import log_preprocessor
+from extractfeature.k8s import value_extract
+import os
+from logparsing.fttree import fttree
+from extractfeature import hdfs_ft_preprocessor
+from anomalydetection.loganomaly import log_anomaly_sequential_train
+from anomalydetection.loganomaly import log_anomaly_sequential_predict
+from anomalydetection.bi_lstm_only import bi_lstm_train
+from anomalydetection.bi_lstm_only import bi_lstm_predict
+
+# parameters for early prepare
+log_file_dir = './Data/log/hdfs/'
+log_file_name = 'HDFS_split'
+log_fttree_out_directory = './Data/FTTreeResult-HDFS/clusters/'
+# anomaly file name used which is also used in ./Data/log/file_split
+anomaly_line_file = './Data/log/hdfs/HDFs_split_anomaly'
+wordvec_file_path = './Data/pretrainedwordvec/crawl-300d-2M.vec(0.1M)'
+sequential_directory = './Data/FTTreeResult-HDFS/sequential_files/'
+train_file_name = 'train_file'
+test_file_name = 'test_file'
+label_file_name = 'label_file'
+pattern_vec_out_path = './Data/FTTreeResult-HDFS/pattern_vec'
+split_degree = 0.2
+# log file line used  which is also used in ./Data/log/file_split
+log_line_num = 200000
+
+# bi lstm only model parameters
+window_length = 20
+input_size = 300
+hidden_size = 128
+num_of_layers = 2
+num_of_classes = 26
+num_epochs = 10
+batch_size = 1000
+# for bi lstm only
+train_root_path = './Data/FTTreeResult-HDFS/bi_model_train/'
+model_out_path = train_root_path + 'bi_model_out/'
+data_file = sequential_directory + train_file_name
+pattern_vec_file = pattern_vec_out_path
+
+# predict parameters
+
+# log anomaly sequential model parameters
+
+if not os.path.exists(log_fttree_out_directory):
+    os.makedirs(log_fttree_out_directory)
+if not os.path.exists(sequential_directory):
+    os.makedirs(sequential_directory)
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+def pattern_extract():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+# 同时生成train file 和 test file好点
+def extract_feature():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, train_file_name, test_file_name, label_file_name, pattern_vec_out_path, split_degree, log_line_num)
+
+
+def pattern_extract_test():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+
+def extract_feature_test():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, 'train_file')
+
+
+def train_model():
+    #log_anomaly_sequential_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+    bi_lstm_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+
+
+def test_model():
+    # do something
+    #log_anomaly_sequential_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=200;epoch=200.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 3, pattern_vec_file)
+    bi_lstm_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 10, pattern_vec_file)
+
+
+#extract_feature()
+#train_model()
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+
diff --git a/log_anomaly_detection.py b/log_anomaly_detection.py
index ddc34dd..6f710cf 100644
--- a/log_anomaly_detection.py
+++ b/log_anomaly_detection.py
@@ -19,18 +19,18 @@
 test_file_name = 'test_file'
 label_file_name = 'label_file'
 pattern_vec_out_path = './Data/FTTreeResult-HDFS/pattern_vec'
-split_degree = 0.2
+split_degree = 0.8
 # log file line used  which is also used in ./Data/log/file_split
 log_line_num = 200000
 
 # log anomaly sequential model parameters some parameter maybe changed to train similar models
-window_length = 4
+window_length = 20
 input_size = 300
-hidden_size = 30
+hidden_size = 128
 num_of_layers = 2
-num_of_classes = 61
-num_epochs = 200
-batch_size = 200
+num_of_classes = 26
+num_epochs = 10
+batch_size = 1000
 # for log anomaly
 train_root_path = './Data/FTTreeResult-HDFS/model_train/'
 model_out_path = train_root_path + 'model_out/'
@@ -41,7 +41,7 @@
 pattern_vec_file = pattern_vec_out_path
 
 # predict parameters
-
+num_of_candidates = 10
 # log anomaly sequential model parameters
 
 if not os.path.exists(log_fttree_out_directory):
@@ -69,17 +69,17 @@ def extract_feature_test():
 
 
 def train_model():
-    #log_anomaly_sequential_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
-    bi_lstm_att_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+    log_anomaly_sequential_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+    #bi_lstm_att_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
 
 
 def test_model():
     # do something
-    log_anomaly_sequential_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=200;epoch=200.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 3, pattern_vec_file)
-    #bi_lstm_att_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=200;epoch=200.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 3, pattern_vec_file)
+    log_anomaly_sequential_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 10, pattern_vec_file)
+    #bi_lstm_att_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + label_file_name, sequential_directory + train_file_name, num_of_candidates, pattern_vec_file)
 
-pattern_extract()
-extract_feature()
+#pattern_extract()
+#extract_feature()
 train_model()
 test_model()
 
diff --git a/log_deep_data_anomaly.py b/log_deep_data_anomaly.py
new file mode 100644
index 0000000..9db9090
--- /dev/null
+++ b/log_deep_data_anomaly.py
@@ -0,0 +1,69 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+from extractfeature.k8s import log_preprocessor
+from extractfeature.k8s import value_extract
+import os
+import torch
+from torch.utils.data import TensorDataset, DataLoader
+from logparsing.fttree import fttree
+from extractfeature import hdfs_ft_preprocessor
+from anomalydetection.self_att_lstm import self_att_lstm_train
+from anomalydetection.self_att_lstm import self_att_lstm_predict
+
+sequential_directory = './Data/logdeepdata/'
+train_file_name = 'hdfs_train'
+test_abnormal_name = 'hdfs_test_abnormal'
+test_normal_name = 'hdfs_test_normal'
+pattern_vec_out_path = './Data/FTTreeResult-HDFS/pattern_vec'
+
+
+#  lstm att model parameters
+window_length = 10
+input_size = 1
+hidden_size = 128
+num_of_layers = 2
+num_of_classes = 28
+num_epochs = 20
+batch_size = 2000
+# for self att lstm
+train_root_path = './Data/Logdeep_Result/self_att_lstm_model_train/'
+model_out_path = train_root_path + 'sa_lstm_model_out/'
+data_file = sequential_directory + train_file_name
+pattern_vec_file = pattern_vec_out_path
+
+# predict parameters
+num_of_candidates = 8
+# log anomaly sequential model parameters
+
+if not os.path.exists(sequential_directory):
+    os.makedirs(sequential_directory)
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+
+def train_model():
+    #log_anomaly_sequential_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+    self_att_lstm_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+
+
+def test_model():
+    # do something
+    #log_anomaly_sequential_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=200;epoch=200.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 3, pattern_vec_file)
+    self_att_lstm_predict.do_log_deep_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + test_normal_name, sequential_directory + test_abnormal_name, num_of_candidates, pattern_vec_file)
+
+
+#pattern_extract()
+#extract_feature_spilt_abnormal()
+#train_model()
+#get_label_sequentials('./Data/FTTreeResult-HDFS/pattern_sequntials')
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/log_predict.py b/log_predict.py
new file mode 100644
index 0000000..8b5268d
--- /dev/null
+++ b/log_predict.py
@@ -0,0 +1,305 @@
+#!/usr/bin/python
+# -*- coding:utf-8 -*-
+import torch
+import time
+from enum import Enum
+from anomalydetection.deeplog.Model1.log_key_LSTM_train import Model as Model1
+from anomalydetection.deeplog.Model2.variable_LSTM_train import Model as Model2
+import torch.nn as nn
+import os
+import matplotlib.pyplot as plt
+from collections import Counter
+
+# use cuda if available  otherwise use cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 记录每个 key 对应的 value 的长度
+value_length_of_key = []
+
+# 继承枚举类
+class LineNumber(Enum):
+    PATTERN_LINE = 0
+    NUMBERS_LINE = 3
+
+
+
+def generate(name,window_length):
+    log_keys_sequences=list()
+    length=0
+    with open(name, 'r') as f:
+        for line in f.readlines():
+            line = list(map(lambda n: n, map(int, line.strip().split())))
+            line = line + [-1] * (window_length + 1 - len(line))
+            # for i in range(len(line) - window_size):
+            #     inputs.add(tuple(line[i:i+window_size]))
+            # log_keys_sequences[tuple(line)] = log_keys_sequences.get(tuple(line), 0) + 1
+            log_keys_sequences.append(tuple(line))
+            length+=1
+    return log_keys_sequences,length
+
+
+def get_value_length(log_preprocessor_dir,log_fttree_out_dir):
+    global value_length_of_key
+    value_length_of_key = [10]*(len(os.listdir(log_fttree_out_dir)) + 1)
+    log_value_folder = log_preprocessor_dir + 'logvalue_train/'
+    file_names = os.listdir(log_value_folder)
+    # for i in range(len(file_names)):
+    #     with open(log_value_folder + str(i+1), 'r') as f:
+    #         x = f.readlines()
+    #         if len(x) == 0 or x[0].strip('\n') == '-1':
+    #             value_length_of_key.append(0)
+    #         else:
+    #             line = x[0].strip('\n')
+    #             key_values = line.split(' ')
+    #             value_length_of_key[i+1] = len(key_values[0].split(','))
+
+
+def load_model1(model_dir,model_name,input_size, hidden_size, num_layers):
+    num_classes = len(value_length_of_key)
+    # num_classes = 28
+    print("Model1 num_classes: ", num_classes)
+    model1_dir = model_dir + 'model1/'
+    model_path = model1_dir + model_name
+    model1 = Model1(input_size, hidden_size, num_layers, num_classes).to(device)
+    model1.load_state_dict(torch.load(model_path, map_location='cpu'))
+    model1.eval()
+    print('model_path: {}'.format(model_path))
+    return model1
+
+
+def load_model2(model_dir,epoch,input_size, hidden_size, num_layers):
+    model2_dir = model_dir+ 'model2/'
+    model2 = []
+    for i in range(len(value_length_of_key)):
+        if value_length_of_key[i] == 0:
+            model2.append(None)
+            continue
+        input_size = value_length_of_key[i]
+        out_size = input_size
+        model_name = str(i+1) + '_epoch=' + str(epoch)+ '.pt'
+        model_path = model2_dir + str(i+1) + '/' + model_name
+        if not os.path.exists(model_path):
+            model2.append(None)
+            continue
+        model = Model2(input_size, hidden_size, num_layers, out_size).to(device)
+        model.load_state_dict(torch.load(model_path, map_location='cpu'))
+        model.eval()
+        print('model_path: {}'.format(model_path))
+        model2.append(model)
+    return model2
+
+
+def draw_evaluation(title, indexs, values, xlabel, ylabel):
+    fig = plt.figure(figsize=(15,10))
+    x = indexs
+    y = values
+    plt.bar(x, y, align='center', alpha=0.5, width=0.4)
+    plt.xticks(x, x)
+    plt.ylabel(ylabel)
+    plt.xlabel(xlabel)
+    plt.title(title)
+    plt.show()
+
+
+def do_predict(log_preprocessor_dir,log_fttree_out_dir,model_dir,model1_name,model2_num_epochs,window_length,input_size, hidden_size, num_layers,num_candidates,mse_threshold,use_model2):
+    # abnormal_label_file = log_preprocessor_dir + 'HDFS_abnormal_label.txt'
+
+    get_value_length(log_preprocessor_dir,log_fttree_out_dir)
+
+    model1 = load_model1(model_dir, model1_name, input_size, hidden_size, num_layers)
+    
+    model2 = load_model2(model_dir,model2_num_epochs,10, hidden_size, num_layers)
+
+    # for Model2's prediction, store which log currently predicts for each log_key.
+    # When model one predicts normal, model2 makes predictions.
+    # At this time, the forward few logs with the same log_key are needed to be predicted
+    # so the pattern_index is used to record the log_key to be predicted.
+    #pattern_index = [0]*len(pattern2value)
+    #pattern_index = [0] * 63
+    start_time = time.time()
+    criterion = nn.MSELoss()
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    ALL = 0
+    test_normal_loader, test_normal_length  = generate(log_preprocessor_dir+ 'logkey/logkey_normal',window_length)
+    test_abnormal_loader, test_abnormal_length=generate(log_preprocessor_dir+'logkey/logkey_abnormal',window_length)
+    
+
+    print('predict start')
+    
+    #normal test
+    with torch.no_grad():
+        for line_num,line in enumerate(test_normal_loader):
+            model1_success=False
+            for i in range(len(line) - window_length-1):
+                seq0 = line[i:i + window_length]
+                label = line[i + window_length]
+               
+
+                seq0 = torch.tensor(seq0, dtype=torch.float).view(
+                    -1,window_length,input_size).to(device)
+                label = torch.tensor(label).view(-1).to(device)
+                output = model1(seq0)
+                predicted = torch.argsort(output,
+                                            1)[0][-num_candidates:]
+                if label not in predicted:
+                    FP += 1
+                    model1_success=True
+                    break 
+            if(model1_success):
+                continue
+
+            
+            #如果模型二预测normal   TN+1  否则FP+1
+
+            #现在有63个预测normal value 文件  对一个line  找对应的 value normal下的行 进行预测   
+            
+            # When model one predicts normal, model2 makes predictions.
+            # values：all log's value vector belongs to log_key（whose id is pattern_id）
+            
+            # 是否使用模型二
+            if use_model2:
+
+                seq=[]  #得到63个normal预测文件下的这个window的seq
+                for i in range(26):
+                    with open(log_preprocessor_dir+'/logvalue_normal/'+str(i+1),'r')as f:
+                        key_values=f.readlines()
+                        key_values=key_values[line_num].strip('\n')
+                        if(key_values=='-1'):
+                            continue
+                        seq.append(key_values.split(' '))
+                #将字符串转为数字
+                for k1 in range(len(seq)):
+                    for k2 in range(len(seq[k1])):
+                        seq[k1][k2]=seq[k1][k2].strip('\n')
+                        seq[k1][k2]=seq[k1][k2].split(',')
+                        for k3 in range(len(seq[k1][k2])):
+                            if(seq[k1][k2][k3]!=''):
+                                seq[k1][k2][k3]=float(seq[k1][k2][k3])
+                
+                #补全
+                for i in range(len(seq)):
+                    if(len(seq[i])<window_length+1):
+                        for j in range(window_length+1- len(seq[i])):
+                            seq[i].append([0.0]*10) 
+                model2_success=False
+                #预测
+                for i in range(len(seq)):
+                    if(model2[i]==None):
+                        continue
+                    for j in range(len(seq[i]) - window_length):
+                        seq2 =seq[i][j:j + window_length]
+                        label2= seq[i][j + window_length]
+
+                        seq2 = torch.tensor(seq2, dtype=torch.float).view(
+                            -1,window_length,10).to(device)
+                        label2 = torch.tensor(label,dtype=torch.float).view(-1).to(device)
+                        output = model2[i](seq2)
+                        mse = criterion(output[0], label2.to(device))
+                        if mse > mse_threshold:
+                            FP+=1
+                            model2_success=True
+                            break
+                    if(model2_success):
+                        break
+
+    
+    #abnormal test
+    with torch.no_grad():
+        for line in test_abnormal_loader:
+            model1_success=False
+            for i in range(len(line) - window_length):
+                seq0 = line[i:i + window_length]
+                label = line[i + window_length]
+
+                seq0 = torch.tensor(seq0, dtype=torch.float).view(
+                    -1, window_length, input_size).to(device)
+                
+                label = torch.tensor(label,).view(-1).to(device)
+                output = model1(seq0)
+                predicted = torch.argsort(output,
+                                            1)[0][-num_candidates:]
+                if label not in predicted:
+                    TP += 1
+                    model1_success=True
+                    break
+            if(model1_success):
+                continue
+
+        # 是否使用模型二
+        if use_model2:
+            seq=[]  #得到63个normal预测文件下的这个window的seq
+            for i in range(26):
+                with open(log_preprocessor_dir+'/logvalue_abnormal/'+str(i+1),'r')as f:
+                    key_values=f.readlines()
+                    key_values=key_values[line_num].strip('\n')
+                    if(key_values=='-1'):
+                        continue
+                    seq.append(key_values.split(' '))
+            #将字符串转为数字
+            for k1 in range(len(seq)):
+                for k2 in range(len(seq[k1])):
+                    seq[k1][k2]=seq[k1][k2].strip('\n')
+                    seq[k1][k2]=seq[k1][k2].split(',')
+                    for k3 in range(len(seq[k1][k2])):
+                        if(seq[k1][k2][k3]!=''):
+                            seq[k1][k2][k3]=float(seq[k1][k2][k3])
+            
+            #补全
+            for i in range(len(seq)):
+                if(len(seq[i])<window_length+1):
+                    for j in range(window_length+1- len(seq[i])):
+                        seq[i].append([0.0]*10) 
+            #预测
+            model2_success=False
+            for i in range(len(seq)):
+                if(model2[i]==None):
+                    continue
+                for j in range(len(seq[i]) - window_length):
+                    seq2 =seq[i][j:j + window_length]
+                    label2= seq[i][j + window_length]
+                
+                    seq2 = torch.tensor(seq2, dtype=torch.float).view(
+                        -1,window_length,10).to(device)
+                    label2 = torch.tensor(label,dtype=torch.float).view(-1).to(device)
+                    output = model2[i](seq2)
+                    mse = criterion(output[0], label2)
+                    if mse > mse_threshold:
+                        TP+=1
+                        model2_success=True
+                        break
+                if(model2_success):
+                    break
+
+        #现在有63个预测normal value 文件  对一个line  找对应的 value normal下的行 进行预测   
+
+
+    # Compute precision, recall and F1-measure
+    FN = test_abnormal_length - TP
+    TN=test_normal_length-FP
+    
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    Acc = (TP + TN) * 100 /(TP+TN+FP+FN)
+    P = 100 * TP / (TP + FP)
+    R = 100 * TP / (TP + FN)
+    F1 = 2 * P * R / (P + R)
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+
+    print('FP: {}, FN: {}, TP: {}, TN: {}'.format(FP, FN, TP, TN))
+    # print('Acc: {:.3f}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(Acc, P, R, F1))
+    print('Finished Predicting')
+    elapsed_time = time.time() - start_time
+    print('elapsed_time: {}'.format(elapsed_time))
+
+    #draw_evaluation("Evaluations", ['Acc', 'Precision', 'Recall', 'F1-measure'],[Acc, P, R, F1], 'evaluations', '%')
+
+
+
+
+
+
+
diff --git a/logparsing/converter/__init__.py b/logparsing/converter/__init__.py
new file mode 100644
index 0000000..9764abf
--- /dev/null
+++ b/logparsing/converter/__init__.py
@@ -0,0 +1 @@
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/logparsing/converter/__pycache__/__init__.cpython-36.pyc b/logparsing/converter/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36fcdb4469308b381ae6f8e1ed5c10e6b5726338
GIT binary patch
literal 171
zcmXr!<>m6&bSO@ofq~&M5W@i@kmUfx#VkM~g&~+hlhJP_LlH<ALHvqvwu<qsOv}tk
zEe6s#sfk5-nR)3kxs~PlMa3EU1u;4K>4|xXIhDnk#W6nl>5ecqP`n_qs2HR;IX|x~
lwWuVuC?-BWGcU6wK3=b&@)n0pZhlH>PO2Tq#$q65006v%Froke

literal 0
HcmV?d00001

diff --git a/logparsing/converter/__pycache__/eventid2number.cpython-36.pyc b/logparsing/converter/__pycache__/eventid2number.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1f0a5f7afeae6157c9bdd57e38e898cf35a08d3
GIT binary patch
literal 496
zcmYjNy-ve05VoBr4G0PY0$!mD18fK(P+5?Gg{_JjnVwsM949)vfl6U2YZu;v7h>?r
z#49jyE+BEzeZIf%etfjKx&GR`kJdP2AMBq9vd>8E4goOWnhiM!h+f!G0KaAZ&L?Hq
z&6t=g35<kvKx!`t9DCp`F+nWFGk3&wQOg$`qCK_{Er-qtbDa?3tmO}J8!(~z2;yaj
z(^Q(0++Y~c=RUbt5)^(0*QaJy-YLAi=tsUguJo)l*2jeb^@L%Oe%Ff{e2T=$$F}h?
zN;6eH(#rU_#=<zCxGJPD4)tQVud8V-(JIWHDr+sBviYRY$_tphG3-&<*JyB@UC%?*
z(yNt3Q<!O1&L1nZ_f?%y6KSNL+rnm7)%5(&{%szL$CYUmI)#~PlyL<d{v2~q&;5o3
z$bZ}dBARGZ0>*oMm!|@^$)<~wS{jfRyX3B7tL6T|7A1UoQdTh2>NNbA4Zo6#Z^|5k
AcK`qY

literal 0
HcmV?d00001

diff --git a/logparsing/converter/eventid2number.py b/logparsing/converter/eventid2number.py
new file mode 100644
index 0000000..ceba5e0
--- /dev/null
+++ b/logparsing/converter/eventid2number.py
@@ -0,0 +1,8 @@
+import pandas as pd
+
+def add_numberid(logparser_templates_file):
+    df = pd.read_csv(logparser_templates_file, header=0)
+    df['numberID'] = range(1, len(df) + 1)
+    print(df)
+
+    df.to_csv(logparser_templates_file, columns=df.columns, index=0, header=1)
\ No newline at end of file
diff --git a/logparsing/converter/logparser2cluster.py b/logparsing/converter/logparser2cluster.py
new file mode 100644
index 0000000..48b95ba
--- /dev/null
+++ b/logparsing/converter/logparser2cluster.py
@@ -0,0 +1,25 @@
+# coding:utf-8
+import pandas as pd
+import os
+
+# log parser_file should be structed.csv output should be './Data/FTTreeResult-HDFS/clusters/'
+def logparser2cluster(logparser_file, output_dir):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    logparser_result = pd.read_csv(logparser_file, header=0)
+    key_dict = {}
+    value_dict = {}
+    for _, row in logparser_result.iterrows():
+        key = row['EventTemplate']
+        if not key in key_dict:
+            key_dict[key] = []
+        key_dict[key].append(str(row['LineId']))
+    key_num = 1
+    for key, lines in key_dict.items():
+        with open(output_dir + "/" + str(key_num), 'w') as f:
+            f.write(key + "\n")
+            f.write(" ".join(lines))
+        key_num += 1
+
+if __name__ == "__main__":
+    logparser2cluster("Drain_result/HDFS.log_structured.csv", "clusters")
diff --git a/logparsing/drain/.gitignore b/logparsing/drain/.gitignore
new file mode 100644
index 0000000..546f7e3
--- /dev/null
+++ b/logparsing/drain/.gitignore
@@ -0,0 +1,9 @@
+**/__pycache__/*
+MANIFEST
+dist/*
+venv/*
+.idea/*
+drain3.egg-info/*
+snapshot.txt
+examples/snapshot.txt
+*.bin
diff --git a/logparsing/drain/CONTRIBUTING.md b/logparsing/drain/CONTRIBUTING.md
new file mode 100644
index 0000000..b54d7be
--- /dev/null
+++ b/logparsing/drain/CONTRIBUTING.md
@@ -0,0 +1,48 @@
+All contributors must agree to the Developer Certificate of Origin Version 1.1. (DCO 1.1) by signing their commits with:
+
+```
+Signed-off-by: [NAME] <[EMAIL]>
+```
+
+This can be simply achieved with `git commit -s` when formatting your commit message.
+
+The full text of the DCO 1.1 is as follows:
+
+```
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+have the right to submit it under the open source license
+indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+of my knowledge, is covered under an appropriate open source
+license and I have the right under that license to submit that
+work with modifications, whether created in whole or in part
+by me, under the same open source license (unless I am
+permitted to submit under a different license), as indicated
+in the file; or
+
+(c) The contribution was provided directly to me by some other
+person who certified (a), (b) or (c) and I have not modified
+it.
+
+(d) I understand and agree that this project and the contribution
+are public and that a record of the contribution (including all
+personal information I submit with it, including my sign-off) is
+maintained indefinitely and may be redistributed consistent with
+this project or the open source license(s) involved.
+```
diff --git a/logparsing/drain/HDFS_drain.py b/logparsing/drain/HDFS_drain.py
new file mode 100644
index 0000000..b14e226
--- /dev/null
+++ b/logparsing/drain/HDFS_drain.py
@@ -0,0 +1,34 @@
+import configparser
+import json
+import logging
+import sys
+import os
+import shutil
+
+from logparsing.drain.drain3.template_miner import TemplateMiner
+from logparsing.drain.drain3.file_persistence import FilePersistence
+
+
+def get_hdfs_drain_clusters(log,drain_out,bin_dir):
+    persistence_type = "FILE"
+    config = configparser.ConfigParser()
+    config.read('drain3.ini')
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+    persistence = FilePersistence(bin_dir)
+    template_miner = TemplateMiner(persistence)
+    shutil.rmtree(drain_out)
+    os.makedirs(drain_out,exist_ok=True)
+    with open(log,'r') as file:
+        lineNum = 0
+        for line in file.readlines():
+            print(lineNum)
+            result = template_miner.add_log_message(line)
+            cluster_id = json.dumps(result["cluster_id"])
+            cluster_id = int(cluster_id[2:-1])
+            with open(drain_out+str(cluster_id),'a') as outfile:
+                outfile.write(str(lineNum) + " ")
+            lineNum += 1
+    # print("Clusters:")
+    #for cluster in template_miner.drain.clusters:
+        #print(cluster)
diff --git a/logparsing/drain/LICENSE.txt b/logparsing/drain/LICENSE.txt
new file mode 100644
index 0000000..d152f60
--- /dev/null
+++ b/logparsing/drain/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 International Business Machines
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/logparsing/drain/README.md b/logparsing/drain/README.md
new file mode 100644
index 0000000..ec7d78a
--- /dev/null
+++ b/logparsing/drain/README.md
@@ -0,0 +1,169 @@
+# Drain3
+## Introduction
+
+Drain3 is an online log template miner that can extract templates (clusters) from a stream of log messages
+in a timely manner. It employs a parse tree with fixed depth to guide the log group search process, 
+which effectively avoids constructing a very deep and unbalanced tree.
+
+Drain3 continuously learns on-the-fly and automatically extracts "log templates" from raw log entries. 
+
+#### Example:
+ 
+For the input:
+
+```
+connected to 10.0.0.1
+connected to 10.0.0.2
+connected to 10.0.0.3
+Hex number 0xDEADBEAF
+Hex number 0x10000
+user davidoh logged in
+user eranr logged in
+```
+
+Drain3 extracts the following templates:
+
+```
+A0001 (size 3): connected to <IP>
+A0002 (size 2): Hex number <HEX>
+A0003 (size 2): user <*> logged in
+```
+
+This project is an upgrade of the original [Drain](https://github.com/logpai/logparser/blob/master/logparser/Drain) 
+project by LogPAI from Python 2.7 to Python 3.6 or later with some bug-fixes and additional features.
+
+Read more information about Drain from the following paper:
+
+- Pinjia He, Jieming Zhu, Zibin Zheng, and Michael R. Lyu. [Drain: An Online Log Parsing Approach with Fixed Depth Tree](http://jmzhu.logpai.com/pub/pjhe_icws2017.pdf), Proceedings of the 24th International Conference on Web Services (ICWS), 2017.
+
+A possible Drain3 use case in this blog post: [Use open source Drain3 log-template mining project to monitor for network outages](https://developer.ibm.com/blogs/how-mining-log-templates-can-help-ai-ops-in-cloud-scale-data-centers).
+
+
+#### New features
+ 
+- **Persistence**. Save and load Drain state into an [Apache Kafka](https://kafka.apache.org) topic or a file.
+- **Streaming**. Support feeding Drain with messages one-be-one.
+- **Masking**. Replace some message parts (e.g numbers, IPs, emails) with wildcards. This improves the accuracy of template mining.
+- **Packaging**. As a pip package. 
+
+#### Expected Input and Output
+
+The input for Drain3 is the unstructured free-text portion log messages. It is recommended to extract 
+structured headers like timestamp, hostname. severity, etc.. from log messages before passing to Drain3, 
+in order to improve mining accuracy.  
+
+The output is a dictionary with the following fields:
+- `change_type`: indicates either if a new template was identified, an existing template was changed or message added to an existing cluster. 
+- `cluster_id`: Sequential ID of the cluster that the log belongs to, for example, `A0008`
+- `cluster_size`: The size (message count) of the cluster that the log belongs to
+- `cluster_count`: Count clusters seen so far
+- `template_mined`: the last template of above cluster_id
+
+Templates may change over time based on input, for example:
+
+```
+aa aa aa
+{"change_type": "cluster_created", "cluster_id": "A0001", "cluster_size": 1, "template_mined": "aa aa aa", "cluster_count": 1}
+
+aa aa ab
+{"change_type": "cluster_template_changed", "cluster_id": "A0001", "cluster_size": 2, "template_mined": "aa aa <*>", "cluster_count": 1}
+```
+
+**Explanation:** *Drain3 learned that the third token is a parameter*
+
+## Configuration
+
+Drain3 is configured using [configparser](https://docs.python.org/3.4/library/configparser.html) using file `drain3.ini` available parameters are:
+- `[DEFAULT]/snapshot_poll_timeout_sec` - maximum timeout for restoring snapshot from Kafka (default 60)
+- `[DEFAULT]/sim_th` - recognition threshold (default 0.4)
+- `[DEFAULT]/masking` - parameters masking - in json format (default "")
+- `[DEFAULT]/snapshot_interval_minutes` - interval for new snapshots (default 1)
+- `[DEFAULT]/compress_state` - whether to compress the state before saving it. This can be useful when using Kafka persistence. 
+
+## Masking
+
+This feature allows masking of specific parameters in log message to specific keywords. Use a list of regular expression  
+dictionaries in the configuration file with the format {'regex_pattern', 'mask_with'} to set custom masking.
+
+In order to mask an IP address created the file `drain3.ini` :
+
+```
+[DEFAULT]
+masking = [
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
+    ]
+```
+
+Now, Drain3 recognizes IP addresses in templates, for example with input such as:
+```
+IP is 12.12.12.12
+{"change_type": "cluster_created", "cluster_id": "A0013", "cluster_size": 1, "template_mined": "IP is <IP>", "cluster_count": 13}
+```
+
+Note: template parameters that do not match custom masking are output as <*>
+
+## Persistence
+The persistence feature saves and loads a snapshot of Drain3 state in (compressed) json format. This feature adds restart resiliency
+to Drain allowing continuation of activity and knowledge across restarts.
+
+Drain3 state includes the search tree and all the clusters that were identified up until snapshot time.
+
+The snapshot also persist number of occurrences per cluster, and the cluster_id.
+
+An example of a snapshot:
+```
+{"clusters": [{"cluster_id": "A0001", "log_template_tokens": `["aa", "aa", "<\*>"]`, "py/object": "drain3_core.LogCluster", "size": 2}, {"cluster_id": "A0002", "log_template_tokens": `["My", "IP", "is", "<IP>"]`, "py/object": "drain3_core.LogCluster", "size": 1}]...
+```
+
+This example snapshot persist two clusters with the templates:
+
+> `["aa", "aa", "<\*>"]` - occurs twice
+>
+>  `["My", "IP", "is", "<IP>"]` - occurs once
+
+Snapshots are created in the following events:
+
+- `cluster_created` - in any new template
+- `cluster_template_changed` - in any update of a template
+- `periodic` - after n minutes from the last snapshot. This is intended to save cluster sizes even if no new template was identified.  
+
+Drain3 currently supports 3 persistence modes:
+
+- **Kafka** - The snapshot is saved in a dedicated topic used only for snapshots - the last message in this topic 
+is the last snapshot that will be loaded after restart.
+For Kafka persistence, you need to provide: `topic_name` and `server_name`. 
+
+- **File** - The snapshot is saved to a file.
+
+- **None** - No persistence.
+
+Drain3 persistence modes can be easily extended to another medium / database by 
+inheriting the [PersistenceHandler](drain3/persistence_handler.py) class.
+
+
+## Installation
+
+Drain3 is available from [PyPI](https://pypi.org/project/drain3). To install use `pip`:
+
+```pip3 install drain3```
+
+
+## Examples
+
+Run [examples/drain_stdin_demo.py](examples/drain_stdin_demo.py) from the root folder of the repository by: 
+
+```
+python -m examples.drain_stdin_demo
+```
+
+Use Drain3 with input from stdin and persist to either Kafka / file / no persistence.
+
+Enter several log lines using the command line. Press `q` to end execution.
+
+Change `persistence_type` variable in the example to change persistence mode.
+
+An example drain3.ini file with masking instructions exists in the `examples` folder.
+
+## Contributing 
+
+Our project welcomes external contributions. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for further details.
diff --git a/logparsing/drain/__init__.py b/logparsing/drain/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/logparsing/drain/drain3/__init__.py b/logparsing/drain/drain3/__init__.py
new file mode 100644
index 0000000..3113a50
--- /dev/null
+++ b/logparsing/drain/drain3/__init__.py
@@ -0,0 +1,2 @@
+from logparsing.drain.drain3.template_miner import TemplateMiner
+
diff --git a/logparsing/drain/drain3/drain.py b/logparsing/drain/drain3/drain.py
new file mode 100644
index 0000000..9b961d4
--- /dev/null
+++ b/logparsing/drain/drain3/drain.py
@@ -0,0 +1,258 @@
+"""
+Description : This file implements the Drain algorithm for log parsing
+Author      : LogPAI team
+Modified by : david.ohana@ibm.com, moshikh@il.ibm.com
+License     : MIT
+"""
+
+param_str = '<*>'
+
+
+class LogCluster:
+    def __init__(self, log_template_tokens: list, cluster_id):
+        self.log_template_tokens = log_template_tokens
+        self.cluster_id = cluster_id
+        self.size = 1
+
+    def get_template(self):
+        return ' '.join(self.log_template_tokens)
+
+    def __str__(self):
+        return f"{self.cluster_id} (size {self.size}): {self.get_template()}"
+
+
+class Node:
+    def __init__(self, key, depth):
+        self.depth = depth
+        self.key = key
+        self.key_to_child_node = {}
+        self.clusters = []
+
+
+class Drain:
+
+    def __init__(self, depth=4, sim_th=0.4, max_children=100):
+        """
+        Attributes
+        ----------
+            depth : depth of all leaf nodes
+            sim_th : similarity threshold
+            max_children : max number of children of an internal node
+        """
+        self.depth = depth - 2
+        self.sim_th = sim_th
+        self.max_children = max_children
+        self.root_node = Node("(ROOT)", 0)
+        self.clusters = []
+
+    @staticmethod
+    def has_numbers(s):
+        return any(char.isdigit() for char in s)
+
+    def tree_search(self, root_node: Node, tokens):
+
+        token_count = len(tokens)
+        parent_node = root_node.key_to_child_node.get(token_count)
+
+        # no template with same token count yet
+        if parent_node is None:
+            return None
+
+        # handle case of empty log string
+        if token_count == 0:
+            return parent_node.clusters[0]
+
+        cluster = None
+        current_depth = 1
+        for token in tokens:
+            at_max_depth = current_depth == self.depth
+            is_last_token = current_depth == token_count
+
+            if at_max_depth or is_last_token:
+                break
+
+            key_to_child_node = parent_node.key_to_child_node
+            if token in key_to_child_node:
+                parent_node = key_to_child_node[token]
+            elif param_str in key_to_child_node:
+                parent_node = key_to_child_node[param_str]
+            else:
+                return cluster
+            current_depth += 1
+
+        cluster = self.fast_match(parent_node.clusters, tokens)
+
+        return cluster
+
+    def add_seq_to_prefix_tree(self, root_node, cluster: LogCluster):
+        token_count = len(cluster.log_template_tokens)
+        if token_count not in root_node.key_to_child_node:
+            first_layer_node = Node(key=token_count, depth=1)
+            root_node.key_to_child_node[token_count] = first_layer_node
+        else:
+            first_layer_node = root_node.key_to_child_node[token_count]
+
+        parent_node = first_layer_node
+
+        # handle case of empty log string
+        if len(cluster.log_template_tokens) == 0:
+            parent_node.clusters.append(cluster)
+            return
+
+        current_depth = 1
+        for token in cluster.log_template_tokens:
+
+            # Add current log cluster to the leaf node
+            at_max_depth = current_depth == self.depth
+            is_last_token = current_depth == token_count
+            if at_max_depth or is_last_token:
+                parent_node.clusters.append(cluster)
+                break
+
+            # If token not matched in this layer of existing tree.
+            if token not in parent_node.key_to_child_node:
+                if not self.has_numbers(token):
+                    if param_str in parent_node.key_to_child_node:
+                        if len(parent_node.key_to_child_node) < self.max_children:
+                            new_node = Node(key=token, depth=current_depth + 1)
+                            parent_node.key_to_child_node[token] = new_node
+                            parent_node = new_node
+                        else:
+                            parent_node = parent_node.key_to_child_node[param_str]
+                    else:
+                        if len(parent_node.key_to_child_node) + 1 < self.max_children:
+                            new_node = Node(key=token, depth=current_depth + 1)
+                            parent_node.key_to_child_node[token] = new_node
+                            parent_node = new_node
+                        elif len(parent_node.key_to_child_node) + 1 == self.max_children:
+                            new_node = Node(key=param_str, depth=current_depth + 1)
+                            parent_node.key_to_child_node[param_str] = new_node
+                            parent_node = new_node
+                        else:
+                            parent_node = parent_node.key_to_child_node[param_str]
+
+                else:
+                    if param_str not in parent_node.key_to_child_node:
+                        new_node = Node(key=param_str, depth=current_depth + 1)
+                        parent_node.key_to_child_node[param_str] = new_node
+                        parent_node = new_node
+                    else:
+                        parent_node = parent_node.key_to_child_node[param_str]
+
+            # If the token is matched
+            else:
+                parent_node = parent_node.key_to_child_node[token]
+
+            current_depth += 1
+
+    # seq1 is template
+    @staticmethod
+    def get_seq_distance(seq1, seq2):
+        assert len(seq1) == len(seq2)
+        sim_tokens = 0
+        param_count = 0
+
+        for token1, token2 in zip(seq1, seq2):
+            if token1 == param_str:
+                param_count += 1
+                continue
+            if token1 == token2:
+                sim_tokens += 1
+
+        ret_val = float(sim_tokens) / len(seq1)
+
+        return ret_val, param_count
+
+    def fast_match(self, cluster_list: list, tokens):
+        match_cluster = None
+
+        max_sim = -1
+        max_param_count = -1
+        max_cluster = None
+
+        for cluster in cluster_list:
+            cur_sim, param_count = self.get_seq_distance(cluster.log_template_tokens, tokens)
+            if cur_sim > max_sim or (cur_sim == max_sim and param_count > max_param_count):
+                max_sim = cur_sim
+                max_param_count = param_count
+                max_cluster = cluster
+
+        if max_sim >= self.sim_th:
+            match_cluster = max_cluster
+
+        return match_cluster
+
+    @staticmethod
+    def get_template(seq1, seq2):
+        assert len(seq1) == len(seq2)
+        ret_val = []
+
+        i = 0
+        for word in seq1:
+            if word == seq2[i]:
+                ret_val.append(word)
+            else:
+                ret_val.append(param_str)
+
+            i += 1
+
+        return ret_val
+
+    def print_tree(self):
+        self.print_node(self.root_node, 0)
+
+    def print_node(self, node, depth):
+        out_str = ''
+        for i in range(depth):
+            out_str += '\t'
+
+        if node.depth == 0:
+            out_str += 'Root'
+        elif node.depth == 1:
+            out_str += '<' + str(node.key) + '>'
+        else:
+            out_str += node.key
+
+        print(out_str)
+
+        if node.depth == self.depth:
+            return 1
+        for child in node.key_to_child_node:
+            self.print_node(node.key_to_child_node[child], depth + 1)
+
+    @staticmethod
+    def num_to_cluster_id(num):
+        cluster_id = "A{:04d}".format(num)
+        return cluster_id
+
+    def add_log_message(self, content: str):
+        content = content.strip()
+        content_tokens = content.split()
+        match_cluster = self.tree_search(self.root_node, content_tokens)
+
+        # Match no existing log cluster
+        if match_cluster is None:
+            cluster_num = len(self.clusters) + 1
+            cluster_id = self.num_to_cluster_id(cluster_num)
+            match_cluster = LogCluster(content_tokens, cluster_id)
+            self.clusters.append(match_cluster)
+            self.add_seq_to_prefix_tree(self.root_node, match_cluster)
+            update_type = "cluster_created"
+
+        # Add the new log message to the existing cluster
+        else:
+            new_template_tokens = self.get_template(content_tokens, match_cluster.log_template_tokens)
+            if ' '.join(new_template_tokens) != ' '.join(match_cluster.log_template_tokens):
+                match_cluster.log_template_tokens = new_template_tokens
+                update_type = "cluster_template_changed"
+            else:
+                update_type = "none"
+            match_cluster.size += 1
+
+        return match_cluster, update_type
+
+    def get_total_cluster_size(self):
+        size = 0
+        for c in self.clusters:
+            size += c.size
+        return size
diff --git a/logparsing/drain/drain3/file_persistence.py b/logparsing/drain/drain3/file_persistence.py
new file mode 100644
index 0000000..26faf66
--- /dev/null
+++ b/logparsing/drain/drain3/file_persistence.py
@@ -0,0 +1,25 @@
+"""
+Description : This file implements the persist/restore from file 
+Author      : Moshik Hershcovitch 
+Author_email: moshikh@il.ibm.com
+License     : MIT
+"""
+
+import os
+import pathlib
+
+from logparsing.drain.drain3.persistence_handler import PersistenceHandler
+
+
+class FilePersistence(PersistenceHandler):
+    def __init__(self, file_path):
+        self.file_path = file_path
+
+    def save_state(self, state):
+        pathlib.Path(self.file_path).write_bytes(state)
+
+    def load_state(self):
+        if not os.path.exists(self.file_path):
+            return None
+
+        return pathlib.Path(self.file_path).read_bytes()
diff --git a/logparsing/drain/drain3/kafka_persistence.py b/logparsing/drain/drain3/kafka_persistence.py
new file mode 100644
index 0000000..c5a05d4
--- /dev/null
+++ b/logparsing/drain/drain3/kafka_persistence.py
@@ -0,0 +1,45 @@
+"""
+Author      : Moshik Hershcovitch
+Author      : David Ohana, Moshik Hershcovitch, Eran Raichstein
+Author_email: david.ohana@ibm.com, moshikh@il.ibm.com, eranra@il.ibm.com
+License     : MIT
+"""
+import configparser
+
+import kafka
+
+# logger = logging.getLogger(__name__)
+from logparsing.drain.drain3.persistence_handler import PersistenceHandler
+
+config = configparser.ConfigParser()
+config.read('drain3.ini')
+
+
+class KafkaPersistence(PersistenceHandler):
+    def __init__(self, server_list, topic):
+        self.server_list = server_list
+        self.topic = topic
+        self.producer = kafka.KafkaProducer(bootstrap_servers=server_list)
+
+    def save_state(self, state):
+        self.producer.send(self.topic, value=state)
+
+    def load_state(self):
+        consumer = kafka.KafkaConsumer(bootstrap_servers=self.server_list)
+        partition = kafka.TopicPartition(self.topic, 0)
+        consumer.assign([partition])
+        end_offsets = consumer.end_offsets([partition])
+        end_offset = list(end_offsets.values())[0]
+        if end_offset > 0:
+            consumer.seek(partition, end_offset - 1)
+            snapshot_poll_timeout_ms = int(config.get('DEFAULT', 'snapshot_poll_timeout_sec', fallback=60)) * 1000
+            records = consumer.poll(snapshot_poll_timeout_ms)
+            if not records:
+                raise RuntimeError(f"No message received from Kafka during restore even though end_offset>0")
+            last_msg = records[partition][0]
+            state = last_msg.value
+        else:
+            state = None
+
+        consumer.close()
+        return state
diff --git a/logparsing/drain/drain3/masking.py b/logparsing/drain/drain3/masking.py
new file mode 100644
index 0000000..a57b9bb
--- /dev/null
+++ b/logparsing/drain/drain3/masking.py
@@ -0,0 +1,65 @@
+"""
+Description : This file implements the persist/restore from Kafka
+Author      : Moshik Hershcovitch
+Author_email: moshikh@il.ibm.com
+License     : MIT
+"""
+import configparser
+import json
+import logging
+import re
+from typing import List
+
+logger = logging.getLogger(__name__)
+config = configparser.ConfigParser()
+config.read('drain3.ini')
+
+
+class MaskingInstruction:
+    def __init__(self, regex_pattern: str, mask_with: str):
+        self.regex_pattern = regex_pattern
+        self.mask_with = mask_with
+        self.regex = re.compile(regex_pattern)
+        self.mask_with_wrapped = "<" + mask_with + ">"
+
+
+class RegexMasker:
+    def __init__(self, masking_instructions: List[MaskingInstruction]):
+        self.masking_instructions = masking_instructions
+
+    def mask(self, content: str):
+        for mi in self.masking_instructions:
+            content = re.sub(mi.regex, mi.mask_with_wrapped, content)
+        return content
+
+
+# Some masking examples
+# ---------------------
+#
+# masking_instances = [
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)', "ID"),
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})((?=[^A-Za-z0-9])|$)', "IP"),
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)', "SEQ"),
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)', "SEQ"),
+#
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)', "HEX"),
+#    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([\-\+]?\d+)((?=[^A-Za-z0-9])|$)', "NUM"),
+#    MaskingInstruction(r'(?<=executed cmd )(".+?")', "CMD"),
+# ]
+
+
+class LogMasker:
+    def __init__(self):
+        masking_instances = []
+        self.masker = None
+        m = json.loads(config.get('DEFAULT', 'masking', fallback="[]"))
+        for i in range(len(m)):
+            logger.info("Adding custom mask {0} --> {1}".format(str(m[i]['mask_with']), str(m[i]['regex_pattern'])))
+            masking_instances.append(MaskingInstruction(m[i]['regex_pattern'], m[i]['mask_with']))
+        self.masker = RegexMasker(masking_instances)
+
+    def mask(self, content: str):
+        if self.masker is not None:
+            return self.masker.mask(content)
+        else:
+            return content
diff --git a/logparsing/drain/drain3/persistence_handler.py b/logparsing/drain/drain3/persistence_handler.py
new file mode 100644
index 0000000..a1e5076
--- /dev/null
+++ b/logparsing/drain/drain3/persistence_handler.py
@@ -0,0 +1,18 @@
+"""
+Description : This file implements an abstract class for implementing a Drain3 persistence handler
+Author      : David Ohana
+Author_email: david.ohana@ibm.com
+License     : MIT
+"""
+from abc import ABC, abstractmethod
+
+
+class PersistenceHandler(ABC):
+
+    @abstractmethod
+    def save_state(self, state):
+        pass
+
+    @abstractmethod
+    def load_state(self):
+        pass
diff --git a/logparsing/drain/drain3/template_miner.py b/logparsing/drain/drain3/template_miner.py
new file mode 100644
index 0000000..aeb4b79
--- /dev/null
+++ b/logparsing/drain/drain3/template_miner.py
@@ -0,0 +1,98 @@
+"""
+Description : This file implements wrapper of the Drain core algorithm - add persistent and recovery
+Author      : David Ohana, Moshik Hershcovitch, Eran Raichstein
+Author_email: david.ohana@ibm.com, moshikh@il.ibm.com, eranra@il.ibm.com
+License     : MIT
+"""
+import base64
+import configparser
+import logging
+import time
+import zlib
+
+import jsonpickle
+
+from logparsing.drain.drain3.drain import Drain
+from logparsing.drain.drain3.masking import LogMasker
+from logparsing.drain.drain3.persistence_handler import PersistenceHandler
+
+logger = logging.getLogger(__name__)
+config = configparser.ConfigParser()
+config.read('drain3.ini')
+
+
+class TemplateMiner:
+
+    def __init__(self, persistence_handler: PersistenceHandler):
+        logger.info("Starting Drain3 template miner")
+        self.compress_state = config.get('DEFAULT', 'compress_state', fallback=True)
+        self.persistence_handler = persistence_handler
+        self.snapshot_interval_seconds = int(config.get('DEFAULT', 'snapshot_interval_minutes', fallback=5)) * 60
+        self.drain = Drain(sim_th=float(config.get('DEFAULT', 'sim_th', fallback=0.4)))
+        self.masker = LogMasker()
+        self.last_save_time = time.time()
+        if persistence_handler is not None:
+            self.load_state()
+
+    def load_state(self):
+        logger.info("Checking for saved state")
+
+        state = self.persistence_handler.load_state()
+        if state is None:
+            logger.info("Saved state not found")
+            return
+
+        if self.compress_state:
+            state = zlib.decompress(base64.b64decode(state))
+
+        drain: Drain = jsonpickle.loads(state)
+
+        # After loading, the keys of "parser.root_node.key_to_child" are string instead of int,
+        # so we have to cast them to int
+        keys = []
+        for i in drain.root_node.key_to_child_node.keys():
+            keys.append(i)
+        for key in keys:
+            drain.root_node.key_to_child_node[int(key)] = drain.root_node.key_to_child_node.pop(key)
+
+        self.drain = drain
+        logger.info("Restored {0} clusters with {1} messages".format(
+            len(drain.clusters), drain.get_total_cluster_size()))
+
+    def save_state(self, snapshot_reason):
+        state = jsonpickle.dumps(self.drain).encode('utf-8')
+        if self.compress_state:
+            state = base64.b64encode(zlib.compress(state))
+
+        logger.info(f"Saving state of {len(self.drain.clusters)} clusters "
+                    f"with {self.drain.get_total_cluster_size()} messages, {len(state)} bytes, "
+                    f"reason: {snapshot_reason}")
+        self.persistence_handler.save_state(state)
+
+    def get_snapshot_reason(self, change_type):
+        if change_type != "none":
+            return change_type
+
+        diff_time_sec = time.time() - self.last_save_time
+        if diff_time_sec >= self.snapshot_interval_seconds:
+            return "periodic"
+
+        return None
+
+    def add_log_message(self, log_message: str):
+        masked_content = self.masker.mask(log_message)
+        cluster, change_type = self.drain.add_log_message(masked_content)
+        result = {
+            "change_type": change_type,
+            "cluster_id": cluster.cluster_id,
+            "cluster_size": cluster.size,
+            "template_mined": cluster.get_template(),
+            "cluster_count": len(self.drain.clusters)
+        }
+
+        if self.persistence_handler is not None:
+            snapshot_reason = self.get_snapshot_reason(change_type)
+            if snapshot_reason:
+                self.save_state(snapshot_reason)
+                self.last_save_time = time.time()
+        return result
diff --git a/logparsing/drain/examples/drain3.ini b/logparsing/drain/examples/drain3.ini
new file mode 100644
index 0000000..8cd0ec8
--- /dev/null
+++ b/logparsing/drain/examples/drain3.ini
@@ -0,0 +1,14 @@
+[DEFAULT]
+sim_th = 0.4
+snapshot_interval_minutes = 10
+snapshot_poll_timeout_sec = 60
+masking = [
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"},
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"},
+    {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"}
+    ]
+
+
diff --git a/logparsing/drain/examples/drain_stdin_demo.py b/logparsing/drain/examples/drain_stdin_demo.py
new file mode 100644
index 0000000..00b45de
--- /dev/null
+++ b/logparsing/drain/examples/drain_stdin_demo.py
@@ -0,0 +1,36 @@
+"""
+Description : Example of using Drain3 with Kafka persistence
+Author      : David Ohana, Moshik Hershcovitch, Eran Raichstein
+Author_email: david.ohana@ibm.com, moshikh@il.ibm.com, eranra@il.ibm.com
+License     : MIT
+"""
+import configparser
+import json
+import logging
+import sys
+sys.path.append('../')
+
+from logparsing.drain.drain3.template_miner import TemplateMiner
+from logparsing.drain.drain3.file_persistence import FilePersistence
+
+persistence_type = "FILE"
+
+config = configparser.ConfigParser()
+config.read('drain3.ini')
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+persistence = FilePersistence("drain3_state.bin")
+template_miner = TemplateMiner(persistence)
+print(f"Drain3 started with '{persistence_type}' persistence, reading from std-in (input 'q' to finish)")
+while True:
+    log_line = input()
+    if log_line == 'q':
+        break
+    result = template_miner.add_log_message(log_line)
+    result_json = json.dumps(result)
+    print(result_json)
+
+print("Clusters:")
+for cluster in template_miner.drain.clusters:
+    print(cluster)
diff --git a/logparsing/drain/requirements.txt b/logparsing/drain/requirements.txt
new file mode 100644
index 0000000..6fa3443
--- /dev/null
+++ b/logparsing/drain/requirements.txt
@@ -0,0 +1,5 @@
+jsonpickle==1.3
+kafka==1.3.5
+
+
+
diff --git a/logparsing/drain/setup.cfg b/logparsing/drain/setup.cfg
new file mode 100644
index 0000000..b88034e
--- /dev/null
+++ b/logparsing/drain/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+description-file = README.md
diff --git a/logparsing/drain/setup.py b/logparsing/drain/setup.py
new file mode 100644
index 0000000..cfb897e
--- /dev/null
+++ b/logparsing/drain/setup.py
@@ -0,0 +1,32 @@
+from setuptools import setup
+from os import path
+
+this_directory = path.abspath(path.dirname(__file__))
+with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+    
+setup(
+    name='drain3',
+    packages= ['drain3'],
+    version="0.7.2",
+    license='MIT', 
+    description="persistent log parser",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="IBM Research Haifa",
+    author_email="drain3@il.ibm.com",
+    url="https://github.com/IBM/Drain3",
+    download_url = 'https://github.com/IBM/Drain3/archive/v_01.tar.gz',
+    keywords = ['drain', 'log', 'parser', 'IBM'], 
+    install_requires=[
+          'jsonpickle==1.3',
+          'kafka==1.3.5'
+      ],
+    classifiers=[
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Topic :: Software Development :: Libraries",
+    ],
+)
diff --git a/logparsing/fttree/__pycache__/__init__.cpython-36.pyc b/logparsing/fttree/__pycache__/__init__.cpython-36.pyc
index aac5a61d3dace8d92be6fbf1239c3e15cc68e376..43201daff2aae4fd04596d139da61ee9bb6337c7 100644
GIT binary patch
delta 26
hcmZ3%xPp<@n3tDp#za;J&a}*&)Z!T5%AARQQ2<|l2mk;8

delta 26
hcmZ3%xPp<@n3tDp`b1U-PPfdQ)Z!T5N}q{-Q2<?S2fqLS

diff --git a/logparsing/fttree/__pycache__/__init__.cpython-37.pyc b/logparsing/fttree/__pycache__/__init__.cpython-37.pyc
index 3f45930d47500957be923c20850ecf917d400ca2..eedcba7ee70fa389a8811fe9c0ac3783e3787bb8 100644
GIT binary patch
delta 60
zcmdnPIEj(liI<m)0SK0t+?dF1F8|Bg*(#>Eq%@^6COJPPHO41D-7zmQr?NP+I6g16
Md}6l_>t~R105zl&kN^Mx

delta 102
zcmbQlxQCJ3iI<m)0SMermrmq1_xzd!WV>3$JYBN<>Dpx=>dB7ICp)@hOch@2UH@eF
rwr5+rU#y=R<CCB6n3tGSS)5sn%+@W=$jnQPPf0DCn5e^A3^Eu1f+8w^

diff --git a/logparsing/fttree/__pycache__/fttree.cpython-36.pyc b/logparsing/fttree/__pycache__/fttree.cpython-36.pyc
index bc40bad1dbdb375000147c1796fb34026fa1be9e..2e2cbbf33ea17cd072223722a352a36c6eceff86 100644
GIT binary patch
delta 406
zcmbOv*(J$o%*)G_#y%%bY$E4$M!ty)QyAGMvoWeOvTfF5yv*pgqJ%kxt%Rk9tC_Kx
z$%P?yS}jjHL#<eiSQb+aR}GIOLo*{ILq!@?6*oi#tTLchJd>eTqC%@ipav{nBQ80)
zmQ{+Gsc^&O0_J)~y~!V#<=OSZ8743nicJ<{2~}NJ!w)o86l4fcG1zFJ+`4dv8h((d
z4M=6f<OwY9j6#!duvjyCO%`F*wBbT?;*VOMiVs*EbPKzKWEihap2eEaST>o9&60f&
z&^auHOD21<WiqasyoD{EGc7YGwK&GNGH0_4`(H-6D4v}B^mve9d`f0f6fc~YmzbLx
zHQAigi!o|)6Q?^*6f;mNilsciC?#t0RnEge;h9`38O0_$bGrh`PHqKObp{58sLAWN
S%^1}uKjqG0RGJ*b;{X7qvvL0b

delta 414
zcmeB@oFvI<%*)GlV6u0d+(gdljA9cPrZBQhW@A)mWZA68c$qP1O$l=fTM0`IS2JTX
zlM6%atXiIQhFY;2u`H$<t{NUmhGs@ahKekxDsG4fSY=49cqT)wM1@|BKn+;DMqH91
zg+*erKeG(umdQEH^^8W7-!RLw8-+7WU@nxJEW{G3x~YaAXs{^A7?5(1;Xt`f;S4qW
zAW<6-uVr!%i#wy(<SQ)JjDC{^ST!B^&>Z=vmZ#zi7AM`q?IbB?mco0JC$Z);)=g$&
zvt&O2bQDYBipfrFnT(qzuVIVlbj!?1EspW6^w})J{+Chi7EexodVCsCFg_)-=oT-W
zmzS8EdTX*irx)X`$yJ>0JhzyEQny&j^NUh$O+L$c7$`iEYbB%9WNU6$Alb;Rz^c!{
Yz;J8wGHx?Q{mFN^a~M@8d+|5`0Mj0Ki2wiq

diff --git a/logparsing/fttree/__pycache__/fttree.cpython-37.pyc b/logparsing/fttree/__pycache__/fttree.cpython-37.pyc
index ab64d4f528b2da46ad7c4552f13830f38def4e80..693b5b2c63c70518db10c5cf77b64bad2b0edf39 100644
GIT binary patch
delta 681
zcmZuv&ubG=5T4oJW;cmiHb2~m5rtq6Ihcc_=1{9%w1g_+wh|R_n-D0eYu37jx1|z9
z@RahP2k}P*74cAnUOb3@LC+%Iya<X14<eeA^OlGuxUlnvdGmeq&BuF@|B*{gC6fuD
z@5zHtR|eU&)H<fto-d)s3s^y&&)_}EKj41Ke`0k^%R>(lR`fPRd%_EW>}h;Ts+h(x
zkf8=0!!!7*)F#YV$*I2-_kqF^J?u%R;PQH8wIIr{%!b4g%*w-4;tHF?E>l#7oikCE
zCgj3#wwuLumd*1!>ONWWU-LjNp}`-@j=huoVq}GNqB8ilT$(F0ThI{<JYecwiSl8k
z27TEtyCP;>2*VMP%~)ibrAcIr@M!5+=w!@3tsKHlenYWan=Y`l=!qFuT?lk9E$j(&
zW{Z=Sb?7gI<&hbM$YRchMApO;dgvLsct&6G_X-~72z+={;P^M?plM|P-k;$)^++u-
z!uX4}|30kkH1gyBjV$1M{#dQpG&U;!Thr6C;OYFcTEZos(yIDfvPtou@w#@LUci0L
z86zQ)<&vrA;5^JiU-e({H`?rhLG|QBqZ{1vmK)a>z4^xUV(Ucb+QM?T-M!kGzs=|L
wX7VP|OB-PEi#WHo^%cZRTW5Ef5@xt-o~HQ7EaEtSXHHQ3Z7$$p-n1tF0Q_RCVE_OC

delta 728
zcmZ`%&ui0g6o20&O=^>-j;(F7u3J?~5mB*KWOIi)oIgN@%CI}sUy!+VmP%|L8GKU*
z9f(Y!<BL1&w1G|!%EsnoAc8jq{{ivPQzzAvhv`8)_>xs9f(dzfdGC|=`MeL`ZEaas
z2mF2?;C1HX-K*W&oH|cJb5AFU#JWg|c-aMVAM;Nniuq5{+b-!)Aq2`&4(v8-xWpS0
z8{ks1cpP}>hl1A-*)*5Kf4*^%doP@RNua(8sc@&@%7U$ofgPfuMUMKYqQer-vL>1+
z*{L*<!*&=MH0u!EP2D;T>tKi)=zw!Vwo2Qms)MalooZC=hbm{j%0NlRFKt3GWJ7tx
zvuQqjMBGtKgUA_j_z;=TfHnS)w#&9+4Pi}WANT}GvhRFPAen(GZ+0Tua?q?>i2Bh1
zS9RLOo)z}R*JuD|2Oaf}N|%E-*D^o@P8GHB<_xO}J?P4+5RKp#E=Djzs92&A$5AWk
zGBkuevKao`A7UMz{b|+dw7B>im2Nj{t)A8X;~AUsbdegH_oRYIZO3+`8n}qz4dcUV
zo&+f~QS6o8;lhIXoSha=9QH1%M!V5512jtaI<{BH65a%;VxUA@$$$YD;S!8M)nl%)
zRdM9pkL!4p2K%yGPo8bPTy|dd%?I_(+3c}c<LyfQ&Fe24vyGL<+4Hx@P8M$Dr;52^
zYksgeo-2%AGe##_Lb~byN^sg8C;TF@^#!R+$mex^m&}n)7MBMx-jZYF0DCC+VSF#=
KNt_*3(tiLe7Q*uY

diff --git a/logparsing/fttree/fttree.py b/logparsing/fttree/fttree.py
index 2eac6ef..a842d32 100644
--- a/logparsing/fttree/fttree.py
+++ b/logparsing/fttree/fttree.py
@@ -23,10 +23,10 @@ def pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, detai
     if message_type_number == -1:
         log_type.append('NO_TYPE')
         log_type_index.append([])
-        for i in range(1, len(log_list)):
+        for i in range(0, len(log_list)):
             log_type_index[0].append(i)
     else:
-        for i in range(1, len(log_list)):
+        for i in range(0, len(log_list)):
             log = []
             for word in log_list[i].split(' '):
                 log.append(word)
@@ -79,7 +79,7 @@ def pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, detai
         FT_tree.append(log_type[i])
         for j in range(0, len(log_type_index[i])):
             sub_word_support = {}
-            for word in log_message[log_type_index[i][j] - 1]:
+            for word in log_message[log_type_index[i][j]]:
                 support = word_support[word]
                 sub_word_support[word] = support
             sub_word_list = sorted(sub_word_support, key=sub_word_support.__getitem__, reverse=True)
@@ -123,7 +123,7 @@ def pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, detai
     for i in range(0, len(log_type)):
         for j in range(0, len(log_type_index[i])):
             sub_word_support = {}
-            for word in log_message[log_type_index[i][j] - 1]:
+            for word in log_message[log_type_index[i][j]]:
                 support = word_support[word]
                 sub_word_support[word] = support
             sub_word_list = sorted(sub_word_support, key=sub_word_support.__getitem__, reverse=True)
@@ -145,7 +145,7 @@ def pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, detai
             file_obj.write(pattern + '\n')
             file_obj.write(' '.join(str(x) for x in log_cluster[pattern]))
             i = i + 1
-        # print(log_cluster)
+
 
 
 if __name__ == '__main__':
diff --git a/robust_anomaly_detection.py b/robust_anomaly_detection.py
new file mode 100644
index 0000000..cd30373
--- /dev/null
+++ b/robust_anomaly_detection.py
@@ -0,0 +1,101 @@
+# -*- coding: UTF-8 -*-
+
+import os
+from logparsing.fttree import fttree
+from extractfeature import hdfs_robust_preprocessor
+from anomalydetection.loganomaly import log_anomaly_sequential_train
+from anomalydetection.loganomaly import log_anomaly_sequential_predict
+from anomalydetection.robust import bi_lstm_att_train
+from anomalydetection.robust import bi_lstm_att_predict
+from logparsing.converter import eventid2number
+
+# parameters for early prepare
+logparser_structed_file = './Data/logparser_result/Drain/HDFS.log_structured.csv'
+logparser_event_file = './Data/logparser_result/Drain/HDFS.log_templates.csv'
+anomaly_label_file = './Data/log/hdfs/anomaly_label.csv'
+sequential_directory = './Data/DrainResult-HDFS/sequential_files/'
+train_file_name = 'robust_train_file'
+test_file_name = 'robust_test_file'
+valid_file_name = 'robust_valid_file'
+wordvec_file_path = './Data/pretrainedwordvec/crawl-300d-2M.vec(0.1M)'
+pattern_vec_out_path = './Data/DrainResult-HDFS/pattern_vec'
+variable_symbol = '<*> '
+'''log_file_dir = './Data/log/hdfs/'
+log_file_name = 'HDFS_split'
+log_fttree_out_directory = './Data/FTTreeResult-HDFS/clusters/'
+# anomaly file name used which is also used in ./Data/log/file_split
+anomaly_line_file = './Data/log/hdfs/HDFs_split_anomaly'
+sequential_directory = './Data/FTTreeResult-HDFS/sequential_files/'
+
+pattern_vec_out_path = './Data/FTTreeResult-HDFS/pattern_vec'''
+
+
+
+# log anomaly sequential model parameters some parameter maybe changed to train similar models
+sequence_length = 50
+input_size = 300
+hidden_size = 128
+num_of_layers = 2
+# 1 using sigmoid, 2 using softmax
+num_of_classes = 1
+num_epochs = 200
+batch_size = 1000
+# for robust attention bi
+train_root_path = './Data/DrainResult-HDFS/robust_att_bi_model_train/'
+model_out_path = train_root_path + 'model_out/'
+train_file = sequential_directory + train_file_name
+pattern_vec_json = pattern_vec_out_path
+
+
+# predict parameters
+# log anomaly sequential model parameters
+
+'''if not os.path.exists(log_fttree_out_directory):
+    os.makedirs(log_fttree_out_directory)'''
+if not os.path.exists(sequential_directory):
+    os.makedirs(sequential_directory)
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+'''def pattern_extract():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+ 同时生成train file 和 test file好点
+def extract_feature():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, train_file_name, test_file_name, label_file_name, pattern_vec_out_path, split_degree, log_line_num)
+
+
+def pattern_extract_test():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+
+def extract_feature_test():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, 'train_file')
+'''
+def extract_feature():
+    hdfs_robust_preprocessor.generate_train_and_test_file(logparser_structed_file, logparser_event_file, anomaly_label_file, sequential_directory, train_file_name, valid_file_name, test_file_name, wordvec_file_path, pattern_vec_out_path, variable_symbol)
+
+
+def train_model():
+    bi_lstm_att_train.train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, train_file, pattern_vec_json)
+
+
+def test_model():
+    # do something
+    bi_lstm_att_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, sequence_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + test_file_name, batch_size, pattern_vec_json)
+
+
+#eventid2number.add_numberid(logparser_event_file)
+#pattern_extract()
+#extract_feature()
+#train_model()
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+
diff --git a/self_att_lstm_anomaly_detection.py b/self_att_lstm_anomaly_detection.py
new file mode 100644
index 0000000..2b47e4e
--- /dev/null
+++ b/self_att_lstm_anomaly_detection.py
@@ -0,0 +1,142 @@
+# -*- coding: UTF-8 -*-
+from extractfeature.k8s import log_preprocessor
+from extractfeature.k8s import value_extract
+import os
+import torch
+from torch.utils.data import TensorDataset, DataLoader
+from logparsing.fttree import fttree
+from extractfeature import hdfs_ft_preprocessor
+from anomalydetection.self_att_lstm import self_att_lstm_train
+from anomalydetection.self_att_lstm import self_att_lstm_predict
+
+# parameters for early prepare
+log_file_dir = './Data/log/hdfs/'
+# log file name used which is also used in ./Data/log/file_split
+log_file_name = 'HDFS_split_40w'
+log_fttree_out_directory = './Data/FTTreeResult-HDFS/clusters/'
+# anomaly file name used which is also used in ./Data/log/file_split
+anomaly_line_file = './Data/log/hdfs/HDFs_split_anomaly_40w'
+wordvec_file_path = './Data/pretrainedwordvec/crawl-300d-2M.vec(0.1M)'
+sequential_directory = './Data/FTTreeResult-HDFS/sequential_files/'
+train_file_name = 'train_file'
+test_file_name = 'test_file'
+label_file_name = 'label_file'
+pattern_vec_out_path = './Data/FTTreeResult-HDFS/pattern_vec'
+split_degree = 0.9
+# log file line used  which is also used in ./Data/log/file_split
+log_line_num = 400000
+
+# bi lstm only model parameters
+window_length = 20
+input_size = 300
+hidden_size = 128
+num_of_layers = 2
+num_of_classes = 26
+num_epochs = 10
+batch_size = 1000
+# for self att lstm
+train_root_path = './Data/FTTreeResult-HDFS/self_att_lstm_model_train/'
+model_out_path = train_root_path + 'sa_lstm_model_out/'
+data_file = sequential_directory + train_file_name
+pattern_vec_file = pattern_vec_out_path
+
+# predict parameters
+num_of_candidates = 8
+# log anomaly sequential model parameters
+
+if not os.path.exists(log_fttree_out_directory):
+    os.makedirs(log_fttree_out_directory)
+if not os.path.exists(sequential_directory):
+    os.makedirs(sequential_directory)
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+def pattern_extract():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+# 同时生成train file 和 test file好点
+def extract_feature():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, train_file_name, test_file_name, label_file_name, pattern_vec_out_path, split_degree, log_line_num)
+
+def extract_feature_spilt_abnormal():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft_split_abnormal(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, train_file_name, test_file_name, label_file_name, pattern_vec_out_path, split_degree, log_line_num)
+
+
+def pattern_extract_test():
+    fttree.pattern_extract(log_file_dir, log_file_name, log_fttree_out_directory, 5, 4, 2)
+
+
+def extract_feature_test():
+    hdfs_ft_preprocessor.preprocessor_hdfs_ft(log_fttree_out_directory, anomaly_line_file, wordvec_file_path, sequential_directory, 'train_file')
+
+
+def train_model():
+    #log_anomaly_sequential_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+    self_att_lstm_train.train_model(window_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, data_file, pattern_vec_file)
+
+
+def test_model():
+    # do something
+    #log_anomaly_sequential_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=200;epoch=200.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, 3, pattern_vec_file)
+    self_att_lstm_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, window_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', sequential_directory + label_file_name, sequential_directory + test_file_name, num_of_candidates, pattern_vec_file)
+
+def generate_seq_label(file_path, window_length, pattern_vec_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+    num_of_sessions = 0
+    input_data, output_data = [], []
+    with open(file_path, 'r') as file:
+        for line in file.readlines():
+            num_of_sessions += 1
+            line = tuple(
+                map(lambda n: tuple(map(float, n.strip().split())), [x for x in line.strip().split(',') if len(x) > 0]))
+            if len(line) < window_length:
+                continue
+            for i in range(len(line) - window_length):
+                label_line = []
+                for j in range(window_length):
+                    label_line.append(vec_to_class_type[line[i+j]])
+                label_line.append(vec_to_class_type[line[i + window_length]])
+                input_data.append(label_line)
+    return input_data
+
+
+def get_label_sequentials(sequential_out_file):
+    vec_to_class_type = {}
+    with open(pattern_vec_file, 'r') as pattern_file:
+        i = 0
+        for line in pattern_file.readlines():
+            pattern, vec = line.split('[:]')
+            pattern_vector = tuple(map(float, vec.strip().split(' ')))
+            vec_to_class_type[pattern_vector] = i
+            i = i + 1
+    with open(sequential_out_file, 'w+') as file:
+        sequence_data_set = generate_seq_label(data_file, window_length, pattern_vec_file)
+        for line in sequence_data_set:
+            for label in line:
+                file.write(str(label))
+                file.write(',')
+            file.write('\n')
+
+
+#pattern_extract()
+#extract_feature_spilt_abnormal()
+#train_model()
+#get_label_sequentials('./Data/FTTreeResult-HDFS/pattern_sequntials')
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+
+# -*- coding: UTF-8 -*-
\ No newline at end of file
diff --git a/self_att_supervised_detection.py b/self_att_supervised_detection.py
new file mode 100644
index 0000000..df27674
--- /dev/null
+++ b/self_att_supervised_detection.py
@@ -0,0 +1,62 @@
+# -*- coding: UTF-8 -*-
+# -*- coding: UTF-8 -*-
+
+import os
+from logparsing.fttree import fttree
+from extractfeature import hdfs_ft_preprocessor
+from anomalydetection.loganomaly import log_anomaly_sequential_train
+from anomalydetection.loganomaly import log_anomaly_sequential_predict
+from anomalydetection.self_att_lstm_supervised import self_att_lstm_supervised_train
+from anomalydetection.self_att_lstm_supervised import self_att_lstm_supervised_predict
+
+# parameters for early prepare
+
+temp_directory = './Data/logdeepdata/'
+train_file_name = 'robust_log_train.csv'
+test_file_name = 'robust_log_test.csv'
+valid_file_name = 'robust_log_valid.csv'
+
+# log anomaly sequential model parameters some parameter maybe changed to train similar models
+sequence_length = 50
+input_size = 300
+hidden_size = 128
+num_of_layers = 2
+# 1 using sigmoid, 2 using softmax
+num_of_classes = 1
+num_epochs = 20
+batch_size = 1000
+# for robust attention bi
+train_root_path = './Data/FTTreeResult-HDFS/self_att_supervised_model_train/'
+model_out_path = train_root_path + 'model_out/'
+train_file = temp_directory + train_file_name
+pattern_vec_json = './Data/logdeepdata/event2semantic_vec.json'
+
+
+# predict parameters
+# log anomaly sequential model parameters
+
+if not os.path.exists(train_root_path):
+    os.makedirs(train_root_path)
+
+
+def train_model():
+    self_att_lstm_supervised_train.train_model(sequence_length, input_size, hidden_size, num_of_layers, num_of_classes, num_epochs, batch_size, train_root_path, model_out_path, train_file, pattern_vec_json)
+
+
+def test_model():
+    # do something
+    self_att_lstm_supervised_predict.do_predict(input_size, hidden_size, num_of_layers, num_of_classes, sequence_length, model_out_path + 'Adam_batch_size=' + str(batch_size) + ';epoch=' + str(num_epochs) + '.pt', temp_directory + test_file_name, batch_size, pattern_vec_json)
+
+#pattern_extract()
+#extract_feature()
+#train_model()
+#train_model()
+test_model()
+
+# deep log
+# log_preprocessor.execute_process()
+# value_extract.get_value()
+# value_extract.value_deal()
+# value_extract.value_extract()
+# train predict
+