-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhash_comp.py
More file actions
109 lines (99 loc) Β· 3.08 KB
/
hash_comp.py
File metadata and controls
109 lines (99 loc) Β· 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import hashlib
import os
import sys
from os import listdir
from os.path import isfile, join
from pprint import pprint
from tqdm.auto import tqdm
script_dir = os.path.dirname(os.path.realpath(__file__))
try:
# load file with extensions to check
with open(os.path.join(script_dir, 'target_extensions.txt')) as f:
target_ext = f.read().split('\n')
if '' in target_ext:
target_ext.remove('')
print('Targeting: ', target_ext)
except Exception as e:
print(f'Error reading target_extensions.txt: {e}')
target_ext = None
def calc_hash(filename, hasher=hashlib.sha1()):
with open(filename, 'rb') as file:
while True:
chunk = file.read(4096) # Read file in chunks
if not chunk:
break
hasher.update(chunk)
return hasher.hexdigest()
if __name__=="__main__":
start_dir = os.getcwd()
if len(sys.argv) > 1:
dirs = sys.argv[1:]
else:
dirs = [os.getcwd()]
hashes = {}
try:
n = 0
t = len(dirs)
for d in dirs:
n += 1
os.chdir(d)
for root, path, files in os.walk(os.getcwd()):
progress_bar = tqdm(total=len(files), desc=f'[{n}/{t}] Checking {root}', unit='File', unit_scale=0)
for file in files:
file = os.path.join(root, file)
progress_bar.update(1)
ext = os.path.splitext(file)[1]
if target_ext is not None and ext not in target_ext:
continue
try:
hv = calc_hash(file, hashlib.blake2b())
except Exception as e:
print(f'Unable to process {file}: {e}')
continue
try:
if hv not in hashes:
hashes[hv] = []
hashes[hv].append(os.path.join(d, file))
if len(hashes[hv]) > 1:
dots = ''
if len(hv) > 16:
dots = '...'
print(f'{hv[:16]}{dots}', hashes[hv])
except Exception as e:
print(f'Error processing {file}: {e}')
#print(f'{file[:15]}: {hv}')
except KeyboardInterrupt:
print('Ctrl-C... stopping & saving')
except Exception as e:
print('!'*80)
print(f'Error checking hashes: {e}')
print('!'*80)
if len(hashes) > 0:
os.chdir(start_dir)
out_file_base = 'hash_res'
out_file_ext = 'txt'
out_suffix = ''
out_file = f'{out_file_base}.{out_file_ext}'
while os.path.exists(out_file):
if out_suffix == '':
out_suffix = 1
else:
out_suffix += 1
out_file = f'{out_file_base} ({out_suffix}).{out_file_ext}'
print(f'Saving results to {out_file}')
with open(out_file, 'w') as f:
progress_bar = tqdm(total=len(hashes), desc='Writing entries', unit='Entry', unit_scale=0)
for key, value in hashes.items():
#print(f'key: {key}, value: {value}')
#print(len(value))
progress_bar.update(1)
try:
if len(value) > 1:
f.write(f'[{len(value)}] {key}: ')
for v in value:
f.write(f'{v}, ')
f.write('\n')
except:
continue
else:
print(f'No hashes found. Exiting')