-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfind_duplicates.py
More file actions
executable file
·32 lines (21 loc) · 877 Bytes
/
find_duplicates.py
File metadata and controls
executable file
·32 lines (21 loc) · 877 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import argparse
import json
import sql_wrapper as sql
def main():
apars = argparse.ArgumentParser(description="Watch a directory for changes", add_help=True)
apars.add_argument("--filedb", dest="filedb", type=str, help="File DB with hashes", required=True)
apars.add_argument("--dup_log", dest="dup_log", type=str, help="Where to record duplicate files.", required=True)
args = apars.parse_args()
fdb = sql.FileDBWrapper(args.filedb)
dup_sha384s = fdb.get_duplicate_sha384()
all_duplicates = []
for sha384 in dup_sha384s:
dup_files = fdb.get_files_for_sha384hash(sha384=sha384)
all_duplicates.append({
"sha384": sha384,
"files": dup_files
})
with open(args.dup_log, "wt") as f:
json.dump(all_duplicates, f, ensure_ascii=False, indent=4)
if __name__ == '__main__':
main()