-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathmatch_features.py
More file actions
43 lines (34 loc) · 1.74 KB
/
match_features.py
File metadata and controls
43 lines (34 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/python3
"""
This script reads the JSON file passed as the first command line argument to
get the names of features and feature_weights.json (written by tensorflow_learn.py)
to get the feature weights from a trained ML model. It then matches the feature
weights to the human-readable names for them, and prints them out, sorted by the weights.
So the first feature listed is the one most indicative of maliciousness, and the first
one in the second list is the one most indicative of benign (the lists are just mirror
images of each other).
"""
import json
import sys
def main():
with open(sys.argv[1]) as vectors:
# Dataset of feature names that were used in the model
feature_names = json.load(vectors)['features']
with open('feature_weights.json') as weights:
# Tensorflow model calculated weights for every feature
feature_weights = json.load(weights)
# Separate malicous and benign weights
malicious_weights = [weight[0] for weight in feature_weights]
benign_weights = [weight[1] for weight in feature_weights]
# Sort weights in descending order
malicious_indices=sorted(range(len(malicious_weights)), key=lambda k: malicious_weights[k], reverse=True)
benign_indices=sorted(range(len(benign_weights)), key=lambda k: benign_weights[k], reverse=True)
# Prints the rank of each feature, its weight, and the feature name
print('MALICIOUS FEATURE RANKINGS:\n')
for i,x in enumerate(malicious_indices):
print ('{}. {} {}'.format(i, feature_names[x], malicious_weights[x]))
print ('\n\n\n\n\nBENIGN FEATURE RANKINGS:\n')
for i,x in enumerate(benign_indices):
print ('{}. {} {}'.format(i, feature_names[x], benign_weights[x]))
if __name__=='__main__':
main()