-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathparse_ssdeep.py
More file actions
executable file
·70 lines (64 loc) · 3 KB
/
parse_ssdeep.py
File metadata and controls
executable file
·70 lines (64 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python3
"""
This script reads the CSV output from ssdeep in the malicious_apk
and benign_apk folders and writes similarity scores for each sample
and classifications to a JSON file for later analysis
The output data format is as follows:
{"features": ["similarity_limit_0", "similarity_limit_0.2", ...],
"apps": {"999eca2457729e371355aea5faa38e14.apk": {"vector": [0,0,0,1], "malicious": [0,1]}, ...}}
"""
import os
import json
import glob
import random
import numpy
import ssdeep
__author__='mwleeds'
def main():
all_hashes = {'malicious': [], 'benign': []}
app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware
similarity_buckets = ['similarity_limit_0', 'similarity_limit_0.2', 'similarity_limit_0.4', 'similarity_limit_0.6', 'similarity_limit_0.8', 'similarity_limit_1.0']
root_dir = os.getcwd()
for i, directory in enumerate(['benign_apk', 'malicious_apk']):
os.chdir(directory)
with open(directory.split('_')[0] + '_apk_ssdeep.csv') as hashes:
for j, line in enumerate(hashes):
if j == 0: continue
b64hash = line.split(',')[0]
app_name = line.split(',')[-1].split('/')[-1][:-2]
app_malicious_map[app_name] = [1,0] if i else [0,1]
all_hashes['malicious' if i else 'benign'].append((app_name, b64hash))
os.chdir(root_dir)
all_apps = {} # mapping from each app to its similarity score and classification
num_zero = {}
num_each = {}
for category in all_hashes:
num_zero[category] = 0
num_each[category] = 0
for app_and_hash in all_hashes[category]:
similarity_scores = []
this_score = app_and_hash[1]
for i in range(1000):
other_score = random.choice(all_hashes[category])[1]
similarity_scores.append(ssdeep.compare(this_score, other_score))
score = numpy.mean(similarity_scores)
num_each[category] += 1
if score == 0: num_zero[category] += 1
bit_vector = []
last_limit = -0.01
for limit in similarity_buckets:
float_limit = float(limit.split('_')[-1])
if score <= float_limit and score > last_limit:
bit_vector.append(1)
else:
bit_vector.append(0)
last_limit = float_limit
if not any(bit_vector): # score > 1
bit_vector[-1] = 1
all_apps[app_and_hash[0]] = {'vector': bit_vector, 'malicious': app_malicious_map[app_and_hash[0]]}
with open('app_hash_vectors.json', 'w') as outfile:
json.dump({'features': similarity_buckets, 'apps': all_apps}, outfile)
print('{} of {} malicious apps and {} of {} benign apps had zero similarity found'.format(num_zero['malicious'], num_each['malicious'], num_zero['benign'], num_zero['benign']))
print('Wrote data on ' + str(len(all_apps)) + ' apps to a file.')
if __name__=='__main__':
main()