From 6d1a5ac936b973b7d3516ebe666fb401b17da295 Mon Sep 17 00:00:00 2001 From: William Gayde Date: Sun, 30 Sep 2018 18:17:02 -0500 Subject: [PATCH 1/8] starting csv parse for sbserver --- cmd/sbserver/parse-csv.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 cmd/sbserver/parse-csv.py diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py new file mode 100644 index 0000000..9882d61 --- /dev/null +++ b/cmd/sbserver/parse-csv.py @@ -0,0 +1,35 @@ +import requests +import json +import csv + +url = "https://safebrowsing.googleapis.com/v4/threatMatches:find?key=" + +x = { + 'threatInfo': { + 'threatTypes': ['ANY_TYPE'], #TODO check this + 'threadEntries': [] + } +} + +entries = [ + {'url':'google.com'}, +] + +#print (entries[0]['url']) + +with open('blacklist-entries.csv') as csvfile: + spamreader = csv.reader(csvfile, delimiter=',') + next(spamreader) + for row in spamreader: + if(row[0][:2] == '//'): + newEntry = {'url':row[0][2:]} + + else: + newEntry = {'url':row[0]} + + #print(newEntry) + entries.append(newEntry) +#print(entries) + +x['threatInfo']['threadEntries'] = entries +print (json.dumps(x)) \ No newline at end of file From d503247af1c5faf20f9f813007cfa50c0ab89c0a Mon Sep 17 00:00:00 2001 From: William Gayde Date: Fri, 5 Oct 2018 11:13:14 -0500 Subject: [PATCH 2/8] working on python --- cmd/sbserver/parse-csv.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 9882d61..8f2cec7 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -2,7 +2,7 @@ import json import csv -url = "https://safebrowsing.googleapis.com/v4/threatMatches:find?key=" +url = "127.0.0.1:8080/v4/threatMatches:find" x = { 'threatInfo': { @@ -16,11 +16,13 @@ ] #print (entries[0]['url']) - +i = 0; with open('blacklist-entries.csv') as csvfile: - spamreader = csv.reader(csvfile, delimiter=',') - next(spamreader) - for row in spamreader: + spamreader = csv.reader(csvfile, delimiter=',') + next(spamreader) + for row in spamreader: + if (i == 10): + break if(row[0][:2] == '//'): newEntry = {'url':row[0][2:]} @@ -29,7 +31,10 @@ #print(newEntry) entries.append(newEntry) + i = i + 1; #print(entries) x['threatInfo']['threadEntries'] = entries -print (json.dumps(x)) \ No newline at end of file +request = requests.post(url, params=x) +print request.txt +#print (json.dumps(x)) \ No newline at end of file From 6159410e6997b3c65347a69983e12cfb920f1a00 Mon Sep 17 00:00:00 2001 From: gayde2 Date: Fri, 5 Oct 2018 17:03:01 -0500 Subject: [PATCH 3/8] working on python --- cmd/sbserver/parse-csv.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 8f2cec7..aa71e60 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -2,12 +2,26 @@ import json import csv -url = "127.0.0.1:8080/v4/threatMatches:find" +url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=AIzaSyC4EjprVfp6YX6aSlHqFZhaUrmbgRMoi7w" x = { + + 'client':{ + 'clientId': 'IllinoisNSRG', + 'clientVersion': '1.0', + }, 'threatInfo': { - 'threatTypes': ['ANY_TYPE'], #TODO check this - 'threadEntries': [] + 'threatTypes': ['THREAT_TYPE_UNSPECIFIED'], + 'platformTypes': ['ANY_PLATFORM'], + 'threatEntryTypes':['URL'], + 'threatEntries': [ + {"hash": "WwuJdQ=="}, + {"hash": "771MOg=="}, + {"hash": "5eOrwQ=="} ] + }, + 'apiClient':{ + 'clientId': 'IllinoisNSRG', + 'clientVersion': '1.0', } } @@ -25,16 +39,19 @@ break if(row[0][:2] == '//'): newEntry = {'url':row[0][2:]} - + else: newEntry = {'url':row[0]} - #print(newEntry) + #print(newEntry) entries.append(newEntry) i = i + 1; #print(entries) -x['threatInfo']['threadEntries'] = entries -request = requests.post(url, params=x) -print request.txt -#print (json.dumps(x)) \ No newline at end of file + +#x['threatInfo']['threatEntries'] = entries +#print(x) +response = requests.post(url, json=x) +print (response.text) +#print (response.json()) +print (json.dumps(x)) From 40884b587bfff977edbc64be53d6544e87e11eeb Mon Sep 17 00:00:00 2001 From: gayde2 Date: Mon, 8 Oct 2018 16:16:50 -0500 Subject: [PATCH 4/8] work on csv parser and hashing --- cmd/sbserver/parse-csv.py | 217 +++++++++++++++++++++++++++++--------- 1 file changed, 165 insertions(+), 52 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index aa71e60..0af833f 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -1,57 +1,170 @@ +from functools import wraps + +try: + import urllib, urlparse +except ImportError: + import urllib.parse as urllib + from urllib import parse as urlparse + +import struct +import time +import posixpath +import re +import hashlib +import socket +import random +import base64 import requests import json import csv -url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=AIzaSyC4EjprVfp6YX6aSlHqFZhaUrmbgRMoi7w" - -x = { - - 'client':{ - 'clientId': 'IllinoisNSRG', - 'clientVersion': '1.0', - }, - 'threatInfo': { - 'threatTypes': ['THREAT_TYPE_UNSPECIFIED'], - 'platformTypes': ['ANY_PLATFORM'], - 'threatEntryTypes':['URL'], - 'threatEntries': [ - {"hash": "WwuJdQ=="}, - {"hash": "771MOg=="}, - {"hash": "5eOrwQ=="} ] - }, - 'apiClient':{ - 'clientId': 'IllinoisNSRG', - 'clientVersion': '1.0', +def full_unescape(u): + uu = urllib.unquote(u) + if uu == u: + return uu + else: + return full_unescape(uu) + +def quote(s): + safe_chars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~' + return urllib.quote(s, safe=safe_chars) + +def canonical(s): + url = s.strip() + url = url.replace('\n', '').replace('\r', '').replace('\t', '') + url = url.split('#', 1)[0] + if url.startswith('//'): + url = 'http:' + url + if len(url.split('://')) <= 1: + url = 'http://' + url + url = quote(full_unescape(url)) + url_parts = urlparse.urlsplit(url) + if not url_parts[0]: + url = 'http://%s' % url + url_parts = urlparse.urlsplit(url) + protocol = url_parts.scheme + host = full_unescape(url_parts.hostname) + path = full_unescape(url_parts.path) + query = url_parts.query + if not query and '?' not in url: + query = None + if not path: + path = '/' + has_trailing_slash = (path[-1] == '/') + path = posixpath.normpath(path).replace('//', '/') + if has_trailing_slash and path[-1] != '/': + path = path + '/' + port = url_parts.port + host = host.strip('.') + host = re.sub(r'\.+', '.', host).lower() + if host.isdigit(): + try: + host = socket.inet_ntoa(struct.pack("!I", int(host))) + except: + pass + if host.startswith('0x') and '.' not in host: + try: + host = socket.inet_ntoa(struct.pack("!I", int(host, 16))) + except: + pass + quoted_path = quote(path) + quoted_host = quote(host) + if port is not None: + quoted_host = '%s:%s' % (quoted_host, port) + canonical_url = '%s://%s%s' % (protocol, quoted_host, quoted_path) + if query is not None: + canonical_url = '%s?%s' % (canonical_url, query) +# print("canonical url is " + canonical_url) + return canonical_url + +def url_host_permutations(host): + if re.match(r'\d+\.\d+\.\d+\.\d+', host): + yield host + return + parts = host.split('.') + l = min(len(parts),5) + if l > 4: + yield host + for i in range(l-1): + yield '.'.join(parts[i-l:]) + +def url_path_permutations(path): + yield path + query = None + if '?' in path: + path, query = path.split('?', 1) + if query is not None: + yield path + path_parts = path.split('/')[0:-1] + curr_path = '' + for i in range(min(4, len(path_parts) )): + curr_path = curr_path + path_parts[i] + '/' + yield curr_path + +def url_permutations(url): +# print("in url_permutations, url is " + url) + protocol, address_str = urllib.splittype(url) + host, path = urllib.splithost(address_str) + user, host = urllib.splituser(str(host)) + host, port = urllib.splitport(host) + host = host.strip('/') + seen_permutations = set() + for h in url_host_permutations(host): + for p in url_path_permutations(path): + u = '%s%s' % (h, p) + if u not in seen_permutations: + yield u + + seen_permutations.add(u) + +def digest(url): + return hashlib.sha256(url.encode('utf-8')).digest() + +def hashes(url): +# print("in hash function, url is " + url) + url_hash = digest(url) + yield url_hash + + +if __name__ == '__main__': + url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=" + + x = { + 'client':{}, + 'clientStates':[], + 'threatInfo': { + 'threatTypes': ['THREAT_TYPE_UNSPECIFIED'], + 'platformTypes': ['ANY_PLATFORM'], + 'threatEntryTypes':['URL'], + 'threatEntries': [] + }, + 'apiClient':{}, } -} - -entries = [ - {'url':'google.com'}, -] - -#print (entries[0]['url']) -i = 0; -with open('blacklist-entries.csv') as csvfile: - spamreader = csv.reader(csvfile, delimiter=',') - next(spamreader) - for row in spamreader: - if (i == 10): - break - if(row[0][:2] == '//'): - newEntry = {'url':row[0][2:]} - - else: - newEntry = {'url':row[0]} - - #print(newEntry) - entries.append(newEntry) - i = i + 1; -#print(entries) - - -#x['threatInfo']['threatEntries'] = entries -#print(x) -response = requests.post(url, json=x) -print (response.text) -#print (response.json()) -print (json.dumps(x)) + + entries = [ ] + + i = 0; + with open('blacklist-entries.csv') as csvfile: + spamreader = csv.reader(csvfile, delimiter=',') + next(spamreader) + for row in spamreader: + if (i == 475): + break + if(row[0][:2] == '//'): + inputURL = row[0][2:] + else: + inputURL = row[0] + for permutations in url_permutations(canonical(inputURL)): + for hashed in hashes(permutations): + hashValue = base64.b64encode(hashed[0:4]) + hashValue.replace('\n', '') + newEntry = {'hash': hashValue} + entries.append(newEntry) + i = i + 1; + x['threatInfo']['threatEntries'] = entries + request = requests.post(url, json=x) + print (request.text) + + + + From ff889ebcc85bda6b638c9be62dce6153d52a1675 Mon Sep 17 00:00:00 2001 From: gayde2 Date: Fri, 12 Oct 2018 14:09:42 -0500 Subject: [PATCH 5/8] python script now does all entries in file in blocks of size 496 --- cmd/sbserver/parse-csv.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 0af833f..621918a 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -74,7 +74,7 @@ def canonical(s): canonical_url = '%s://%s%s' % (protocol, quoted_host, quoted_path) if query is not None: canonical_url = '%s?%s' % (canonical_url, query) -# print("canonical url is " + canonical_url) + #print("canonical url is " + canonical_url) return canonical_url def url_host_permutations(host): @@ -127,7 +127,7 @@ def hashes(url): if __name__ == '__main__': - url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=" + url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=AIzaSyBU6G2w4ItQUaWMTQCAzgViEX2mN-a1sxc" x = { 'client':{}, @@ -143,27 +143,37 @@ def hashes(url): entries = [ ] + + #print (entries[0]['url']) i = 0; + send = 0 with open('blacklist-entries.csv') as csvfile: spamreader = csv.reader(csvfile, delimiter=',') next(spamreader) for row in spamreader: - if (i == 475): - break + if (i % 496 == 0): + send = 1 if(row[0][:2] == '//'): inputURL = row[0][2:] else: inputURL = row[0] for permutations in url_permutations(canonical(inputURL)): for hashed in hashes(permutations): + #print (hashed[0:4]) hashValue = base64.b64encode(hashed[0:4]) hashValue.replace('\n', '') + #print(hashValue) newEntry = {'hash': hashValue} entries.append(newEntry) i = i + 1; - x['threatInfo']['threatEntries'] = entries - request = requests.post(url, json=x) - print (request.text) + + if send == 1: + x['threatInfo']['threatEntries'] = entries + request = requests.post(url, json=x) +# print(json.dumps(x)) + print (request.text) + send = 0 + entries = [] From de39e09def49498732f10acede8556575b6daa9f Mon Sep 17 00:00:00 2001 From: gayde2 Date: Fri, 12 Oct 2018 14:34:57 -0500 Subject: [PATCH 6/8] parse python function now returns results --- cmd/sbserver/parse-csv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 621918a..83b36d7 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -127,13 +127,16 @@ def hashes(url): if __name__ == '__main__': - url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=AIzaSyBU6G2w4ItQUaWMTQCAzgViEX2mN-a1sxc" + url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=" x = { - 'client':{}, + 'client':{ + 'clientId': 'NSRG', + 'clientVersion': '1.0' + }, 'clientStates':[], 'threatInfo': { - 'threatTypes': ['THREAT_TYPE_UNSPECIFIED'], + 'threatTypes': ['MALWARE', 'SOCIAL_ENGINEERING', 'UNWANTED_SOFTWARE', 'POTENTIALLY_HARMFUL_APPLICATION', 'THREAT_TYPE_UNSPECIFIED'], 'platformTypes': ['ANY_PLATFORM'], 'threatEntryTypes':['URL'], 'threatEntries': [] From 17e0c5154e471fc2cfb2ff9f8410c0df0499f53c Mon Sep 17 00:00:00 2001 From: gayde2 Date: Mon, 15 Oct 2018 14:26:17 -0500 Subject: [PATCH 7/8] progress on csv parser --- cmd/sbserver/parse-csv.py | 74 +++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 83b36d7..48691b6 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -1,3 +1,5 @@ +#Usage python parse-csv.py api-key inputCSV outputCSV + from functools import wraps try: @@ -17,6 +19,11 @@ import requests import json import csv +import datetime +import sys + +urlLookup = dict() +partialHashLookup = dict() def full_unescape(u): uu = urllib.unquote(u) @@ -118,16 +125,17 @@ def url_permutations(url): seen_permutations.add(u) def digest(url): - return hashlib.sha256(url.encode('utf-8')).digest() + digest = hashlib.sha256(url.encode('utf-8')).digest() + urlLookup[base64.b64encode(digest)] = url + return digest def hashes(url): -# print("in hash function, url is " + url) url_hash = digest(url) yield url_hash if __name__ == '__main__': - url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=" + url = "https://safebrowsing.googleapis.com/v4/fullHashes:find?key=" + sys.argv[1] x = { 'client':{ @@ -147,36 +155,72 @@ def hashes(url): entries = [ ] - #print (entries[0]['url']) i = 0; send = 0 - with open('blacklist-entries.csv') as csvfile: - spamreader = csv.reader(csvfile, delimiter=',') - next(spamreader) - for row in spamreader: - if (i % 496 == 0): - send = 1 + with open(sys.argv[2]) as csvfile: + with open(sys.argv[3], "w+") as outfile: + blacklistWriter = csv.writer(outfile, delimiter=',') + blacklistReader = csv.reader(csvfile, delimiter=',') + blacklistWriter.writerow(['URL', 'Full Hash', 'Partial Hash', 'UTC Time Stamp', 'Match Type', 'Match Metadata', 'Platform']) + next(blacklistReader) + for row in blacklistReader: + if (i % 200 == 0): + send = 1 if(row[0][:2] == '//'): inputURL = row[0][2:] else: inputURL = row[0] for permutations in url_permutations(canonical(inputURL)): for hashed in hashes(permutations): - #print (hashed[0:4]) hashValue = base64.b64encode(hashed[0:4]) + partialHashLookup[base64.b64encode(hashed)] = hashValue hashValue.replace('\n', '') - #print(hashValue) newEntry = {'hash': hashValue} entries.append(newEntry) i = i + 1; if send == 1: x['threatInfo']['threatEntries'] = entries - request = requests.post(url, json=x) -# print(json.dumps(x)) - print (request.text) + response = requests.post(url, json=x) + responseJSON = json.loads(response.text) + if 'matches' in responseJSON: + for hits in responseJSON['matches']: + if 'threatType' in hits: + threatType = hits['threatType'] + if 'platformType' in hits: + platformType = hits['platformType'] + currentHash = '' + if 'threat' in hits: + currentHash = hits['threat']['hash'] + malwareType = '' + timestamp = datetime.datetime.utcnow() + if 'threatEntryMetadata' in hits: + if 'entries' in hits['threatEntryMetadata']: + malwareTypeHash = hits['threatEntryMetadata']['entries'][0]['value'] + if 'TEFORElORw' in malwareTypeHash: + malwareType = 'MALWARE LANDING' + if 'RElTVFJJQlVUSU9O' in malwareTypeHash: + malwareType = 'MALWARE DISTRIBUTION' + + if currentHash in urlLookup: + currentURL = urlLookup[currentHash] + urlLookup.pop(currentHash) + else: + currentURL = "" + + if currentHash in partialHashLookup: + partialHash = partialHashLookup[currentHash] + partialHashLookup.pop(currentHash) + else: + partialHash = "" + blacklistWriter.writerow([currentURL, currentHash, partialHash, timestamp, threatType, malwareType, platformType]) send = 0 entries = [] + leftovers = urlLookup.items() + for extra in leftovers: + blacklistWriter.writerow([extra[1], extra[0], partialHashLookup[extra[0]], timestamp, "", "", ""]) + + From bd6ac636a9d2bf339ffb19b587f6c79908dd200c Mon Sep 17 00:00:00 2001 From: gayde2 Date: Mon, 15 Oct 2018 16:16:03 -0500 Subject: [PATCH 8/8] fixed variable names and useless lines --- cmd/sbserver/parse-csv.py | 151 ++++++++++++++++++++------------------ 1 file changed, 79 insertions(+), 72 deletions(-) diff --git a/cmd/sbserver/parse-csv.py b/cmd/sbserver/parse-csv.py index 48691b6..eaf78a6 100644 --- a/cmd/sbserver/parse-csv.py +++ b/cmd/sbserver/parse-csv.py @@ -22,8 +22,8 @@ import datetime import sys -urlLookup = dict() -partialHashLookup = dict() +URL_lookup = dict() #K:Hash, V: URL +partial_hash_lookup = dict() #K:Hash, V:partial_hash def full_unescape(u): uu = urllib.unquote(u) @@ -120,13 +120,12 @@ def url_permutations(url): for p in url_path_permutations(path): u = '%s%s' % (h, p) if u not in seen_permutations: - yield u - + yield u seen_permutations.add(u) def digest(url): digest = hashlib.sha256(url.encode('utf-8')).digest() - urlLookup[base64.b64encode(digest)] = url + URL_lookup[base64.b64encode(digest)] = url return digest def hashes(url): @@ -155,73 +154,81 @@ def hashes(url): entries = [ ] - i = 0; - send = 0 - with open(sys.argv[2]) as csvfile: - with open(sys.argv[3], "w+") as outfile: - blacklistWriter = csv.writer(outfile, delimiter=',') - blacklistReader = csv.reader(csvfile, delimiter=',') - blacklistWriter.writerow(['URL', 'Full Hash', 'Partial Hash', 'UTC Time Stamp', 'Match Type', 'Match Metadata', 'Platform']) - next(blacklistReader) - for row in blacklistReader: - if (i % 200 == 0): - send = 1 - if(row[0][:2] == '//'): - inputURL = row[0][2:] - else: - inputURL = row[0] - for permutations in url_permutations(canonical(inputURL)): - for hashed in hashes(permutations): - hashValue = base64.b64encode(hashed[0:4]) - partialHashLookup[base64.b64encode(hashed)] = hashValue - hashValue.replace('\n', '') - newEntry = {'hash': hashValue} - entries.append(newEntry) - i = i + 1; - - if send == 1: - x['threatInfo']['threatEntries'] = entries - response = requests.post(url, json=x) - responseJSON = json.loads(response.text) - if 'matches' in responseJSON: - for hits in responseJSON['matches']: - if 'threatType' in hits: - threatType = hits['threatType'] - if 'platformType' in hits: - platformType = hits['platformType'] - currentHash = '' - if 'threat' in hits: - currentHash = hits['threat']['hash'] - malwareType = '' - timestamp = datetime.datetime.utcnow() - if 'threatEntryMetadata' in hits: - if 'entries' in hits['threatEntryMetadata']: - malwareTypeHash = hits['threatEntryMetadata']['entries'][0]['value'] - if 'TEFORElORw' in malwareTypeHash: - malwareType = 'MALWARE LANDING' - if 'RElTVFJJQlVUSU9O' in malwareTypeHash: - malwareType = 'MALWARE DISTRIBUTION' - - if currentHash in urlLookup: - currentURL = urlLookup[currentHash] - urlLookup.pop(currentHash) - else: - currentURL = "" - - if currentHash in partialHashLookup: - partialHash = partialHashLookup[currentHash] - partialHashLookup.pop(currentHash) - else: - partialHash = "" - blacklistWriter.writerow([currentURL, currentHash, partialHash, timestamp, threatType, malwareType, platformType]) - send = 0 - entries = [] - leftovers = urlLookup.items() - for extra in leftovers: - blacklistWriter.writerow([extra[1], extra[0], partialHashLookup[extra[0]], timestamp, "", "", ""]) - - - +i = 0; +send = 0 +with open(sys.argv[2]) as csvfile: + with open(sys.argv[3], "w+") as outfile: + csv_writer = csv.writer(outfile, delimiter=',') + blacklist_reader = csv.reader(csvfile, delimiter=',') + csv_writer.writerow(['URL', 'Full Hash', 'Partial Hash', 'UTC Time Stamp', 'Match Type', 'Match Metadata', 'Platform']) + next(blacklist_reader) + for row in blacklist_reader: + if (i % 200 == 0): + send = 1 + if(row[0][:2] == '//'): + input_URL = row[0][2:] + else: + input_URL = row[0] + + seen_input_hashes = set() + for permutation in url_permutations(canonical(input_URL)): + sha256_hash = digest(permutation) + if sha256_hash not in seen_input_hashes: + partial_hash = base64.b64encode(sha256_hash[0:4]) + partial_hash_lookup[base64.b64encode(sha256_hash)] = partial_hash + new_entry = {'hash': partial_hash} + entries.append(new_entry) + seen_input_hashes.add(sha256_hash) + i = i + 1; + + if send == 1: + x['threatInfo']['threatEntries'] = entries + response = requests.post(url, json=x) + response_JSON = json.loads(response.text) + seen_output_hashes = set() + if 'matches' in response_JSON: + for hits in response_JSON['matches']: + if 'threatType' in hits: + threat_type = hits['threatType'] + if 'platformType' in hits: + platform_type = hits['platformType'] + current_hash = '' + if 'threat' in hits: + current_hash = hits['threat']['hash'] + timestamp = datetime.datetime.utcnow() + malware_type = '' + if 'threatEntryMetadata' in hits: + if 'entries' in hits['threatEntryMetadata']: + malware_type_hash = hits['threatEntryMetadata']['entries'][0]['value'] + if 'TEFORElORw' in malware_type_hash: + malware_type = 'MALWARE LANDING' + if 'RElTVFJJQlVUSU9O' in malware_type_hash: + malware_type = 'MALWARE DISTRIBUTION' + + if current_hash in URL_lookup: + current_URL = URL_lookup[current_hash] + URL_lookup.pop(current_hash) + else: + current_URL = "" + + if current_hash in partial_hash_lookup: + partial_hash = partial_hash_lookup[current_hash] + partial_hash_lookup.pop(current_hash) + else: + decoded_hash = base64.b64decode(current_hash)[0:4] + partial_hash = base64.b64encode(decoded_hash) + new_row = [current_URL, current_hash, partial_hash, timestamp, threat_type, malware_type, platform_type] + if new_row[1] not in seen_output_hashes: + csv_writer.writerow(new_row) + seen_output_hashes.add(new_row[1]) + send = 0 + entries = [] + leftovers = URL_lookup.items() + seen_leftover_hashes = set() + for extra in leftovers: + if extra[0] not in seen_leftover_hashes: + csv_writer.writerow([extra[1], extra[0], partial_hash_lookup[extra[0]], timestamp, "", "", ""]) + seen_leftover_hashes.add(extra[0])