Skip to content

Commit f551e56

Browse files
committed
here you go
1 parent 572036c commit f551e56

4 files changed

Lines changed: 237 additions & 0 deletions

File tree

README.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,17 @@
11
Hello!
2+
3+
split-file.py to split the build log and service log into multiple files based on the test case (namespace)
4+
5+
rmtime.sh remove time info in the service log file to get only the log message for training
6+
7+
clean.py to remove all the special chars like :;\/
8+
9+
# pipeline to train the language model
10+
1. combine all success test case service logs
11+
2. run rmtime.sh
12+
3. run clean.py
13+
4. run fasttext -skipgram -input test.log -output model/k8s
14+
15+
16+
# generate vectors for each line of log
17+
cat log.file | fasttext print-sentence-vectors model/k8s.bin

clean.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re
2+
import argparse
3+
import os
4+
5+
parser = argparse.ArgumentParser(description='log file .')
6+
parser.add_argument('log_file', metavar='log_file', type=str, help='log_file file')
7+
args = parser.parse_args()
8+
log_file = args.log_file
9+
10+
file = open(log_file, 'r')
11+
content = file.read()
12+
out = re.split('\\| |:|;|,|\*|\"|\'|=|\[|\]|\(|\)|/|{|}' ,content)
13+
with open(log_file + ".clean", "w") as f:
14+
f.write(" ".join(out))
15+

rmtime.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
cat $1 | tr -s ' ' | cut -d ' ' -f 8- > $1.rmtime

split-file.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
#!/usr/bin/env python
2+
3+
import json
4+
import logging
5+
import os
6+
import subprocess
7+
import sys
8+
import time
9+
import argparse
10+
import re
11+
import gzip
12+
13+
from os import listdir
14+
from os.path import isfile, isdir, join
15+
16+
17+
# given build log file and service log file, split the two into each test run based on namespace
18+
# build log contains if the test run is success or not, we'll put the service log splits into ok/failed directories
19+
parser = argparse.ArgumentParser(description='split logs for each test case.')
20+
parser.add_argument('build_log', metavar='build_log', type=str, help='build log file')
21+
parser.add_argument('service_log', metavar='service_log', type=str, help='service log file')
22+
23+
args = parser.parse_args()
24+
25+
build_log = args.build_log
26+
service_log = args.service_log
27+
28+
logger = logging.getLogger(__name__)
29+
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
30+
31+
build_log_out_dir_ok = build_log + ".ok"
32+
build_log_out_dir_err = build_log + ".err"
33+
if not isdir(build_log_out_dir_ok):
34+
os.mkdir(build_log_out_dir_ok)
35+
if not isdir(build_log_out_dir_err):
36+
os.mkdir(build_log_out_dir_err)
37+
38+
if not os.path.isfile(build_log):
39+
logger.info(f"build log file not exist {build_log}")
40+
quit()
41+
42+
#############################################################
43+
# split the build log based on regex pattens
44+
#############################################################
45+
# summary = re.compile(".*Passed.*Failed.*Pending.*Skipped.*")
46+
# start
47+
# [BeforeEach]
48+
# end:
49+
# STEP: Destroying namespace "container-runtime-7928" for this suite.
50+
# continue to next [BeforeEach], set as end
51+
# extract namespace
52+
# write start/end to separte file
53+
# continue
54+
# OR
55+
# [SKIPPING]
56+
# discard tmp data
57+
# continue
58+
start = re.compile("^(.\d{4} \d{2}:\d{2}:\d{2}\.\d{3}\] )?\[BeforeEach\].*") # start of test case
59+
end = re.compile(".* Destroying namespace \"(.*)\" for this suite.*") # end of test case
60+
endNamespace = re.compile(".*Waiting for namespaces \[(.*)\] to vanish.*") # namespace in end of test case
61+
skip = re.compile(".*\[SKIPPING\].*") # test case skipped
62+
summary = re.compile(".*m(\d+) Passed.* \| .*m(\d+) Failed.* \| .*m(\d+) Pending.* \| .*m(\d+) Skipped.*") # summary of the whole build
63+
failure = re.compile(".*Failure \[\d+\.\d+ .*\].*") # failure information
64+
namespaces = [] # namespace is used to split the logs because each test case is using a new namespace
65+
results = {}
66+
failedCases = {}
67+
68+
with open(build_log) as bf:
69+
store = "" # to keep logs for current test case
70+
namespace = "" # namespace used for current test case
71+
matching = False # when true, means already found a start
72+
matchingEnd = False # when true, means found an end or summary
73+
failed = False
74+
75+
for line in bf:
76+
if start.match(line):
77+
if matchingEnd:
78+
# write the data to a file with namespace as the file name
79+
outdir = build_log_out_dir_ok
80+
if failed:
81+
failedCases[namespace]=True
82+
outdir = build_log_out_dir_err
83+
with open(join(outdir, namespace), 'w') as pf:
84+
pf.write(store)
85+
# print("found match for namespace", namespace, "failed: ", failed)
86+
store = ""
87+
namespace = ""
88+
matching = False
89+
matchingEnd = False
90+
failed = False
91+
92+
matching = True
93+
store = store + line
94+
elif summary.match(line):
95+
# build_log parsing finishing, write out the data for last test case
96+
outdir = build_log_out_dir_ok
97+
if failed:
98+
failedCases[namespace]=True
99+
outdir = build_log_out_dir_err
100+
with open(join(outdir, namespace), 'w') as pf:
101+
pf.write(store)
102+
print("found match for namespace at the end", namespace, "failed: ", failed)
103+
break
104+
store = ""
105+
namespace = ""
106+
matching = False
107+
failed = False
108+
elif skip.match(line):
109+
# test case skipped
110+
matching = False
111+
store = ""
112+
continue
113+
elif end.match(line) or endNamespace.match(line):
114+
store += line
115+
# if already found end, and another end coming, means there are multiple ns to be destroyed, we just pick the first one which should have the common prefix
116+
if matchingEnd:
117+
continue
118+
119+
m = end.match(line)
120+
if not m:
121+
m = endNamespace.match(line)
122+
namespace = m.group(1)
123+
namespaces.append(namespace)
124+
125+
# now we found the destroying line, continue to the next BeforeEach, then write the data
126+
matchingEnd = True
127+
continue
128+
else:
129+
if matching:
130+
store += line
131+
if matchingEnd:
132+
if failure.match(line):
133+
failed = True
134+
135+
136+
# construct a regex contains all the namespaces, to filter out logs from service log
137+
# so if the log does not contain the namespace, the the log is actually not selected out, this could be an issue
138+
# the namespace is like node-port-7890, or node-port-1234-7890, because there are many namespaces
139+
# we only concatinate the alphabet part of all of them, then add a digit suffix, to speed up the match process
140+
nsprefix={}
141+
for n in namespaces:
142+
sp = n.split("-")
143+
newarr = [st for st in sp if not st.isdecimal()]
144+
nsprefix["-".join(newarr)]=""
145+
146+
prefix = "(" + "|".join(nsprefix) + ")"
147+
sufix = "((-\d{1,4}))"
148+
rep = re.compile(".*(" + prefix + sufix + ").*")
149+
150+
# teststr = "I0331 18:34:44.120761 10 eventhandlers.go:279] \"Delete event for scheduled pod\" pod=\"csi-mock-volumes-1570-6273/csi-mockplugin-0\""
151+
# m = rep.match(teststr)
152+
# if m:
153+
# print(m.group(1))
154+
155+
156+
# art_dir = join(log_dir,"artifacts")
157+
158+
159+
# dirs = [f for f in listdir(art_dir) if isdir(join(art_dir, f))]
160+
# for d in dirs:
161+
# dir=join(art_dir, d)
162+
# files = [f for f in listdir(dir) if isfile(join(dir, f))]
163+
164+
# for root, dirs, files in os.walk(art_dir):
165+
# for file in files:
166+
167+
# if not "log" in file:
168+
# continue
169+
#
170+
171+
# if file.endswith(".log"):
172+
# bf = open(join(root,file))
173+
# else:
174+
# bf = gzip.open(join(root, file), "rt")
175+
# try:
176+
# bf.readline()
177+
# except gzip.BadGzipFile:
178+
# print(file, "is not a gzip file")
179+
# bf = open(join(root,file))
180+
181+
for n in namespaces:
182+
results[n] = []
183+
184+
bf = open(service_log)
185+
for line in bf:
186+
m = rep.match(line)
187+
if m:
188+
ns = m.group(1)
189+
# this ns may not be the exact one
190+
if ns in results:
191+
results[ns].append(line)
192+
193+
194+
for n in namespaces:
195+
if results[n]:
196+
if n in failedCases:
197+
outdir = join(service_log+".err")
198+
else:
199+
outdir = join(service_log+".ok")
200+
if not os.path.isdir(outdir):
201+
os.mkdir(outdir)
202+
with open(join(outdir, n), "w") as f:
203+
f.writelines(results[n])
204+
205+

0 commit comments

Comments
 (0)