-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
92 lines (77 loc) · 2.2 KB
/
utils.py
File metadata and controls
92 lines (77 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from datetime import datetime
import filecmp
import exifread
_size_cache = {}
def get_file_size(f):
if f in _size_cache:
return _size_cache[f]
else:
sz = os.stat(f).st_size
_size_cache[f] = sz
return sz
def is_skipped(path, skip_path):
if skip_path:
for sp in skip_path:
if sp in path:
return True
return False
def _get_recursive(parent_dir, skip_path=None):
result = []
print(parent_dir)
for dirpath, dirnames, filenames in os.walk(parent_dir):
if not is_skipped(dirpath, skip_path):
for f in filenames:
full_path = os.path.join(dirpath, f)
result.append(full_path)
else:
print('Skipped:', dirpath)
return result
def get_files_recursive(parent_dir, skip_path=None):
if isinstance(parent_dir, str):
return _get_recursive(parent_dir, skip_path)
else:
result = []
for d in parent_dir:
files = _get_recursive(d, skip_path)
result.extend(files)
return result
def get_exif_date(filename):
with open(filename, 'rb') as f:
tags = exifread.process_file(f)
date_tag = 'EXIF DateTimeOriginal'
if date_tag in tags:
date = str(tags[date_tag])
return datetime.strptime(date, '%Y:%m:%d %H:%M:%S')
def myfilecmp(f1, f2, shallow=True):
size1 = get_file_size(f1)
size2 = get_file_size(f2)
if size1 and size1 == size2:
d1 = get_exif_date(f1)
d2 = get_exif_date(f2)
is_date_equal = (d1 and d2 and d1 == d2)
if not is_date_equal:
mtime1 = os.stat(f1).st_mtime
mtime2 = os.stat(f2).st_mtime
is_date_equal = (mtime1 == mtime2)
if not shallow and is_date_equal:
return filecmp.cmp(f1, f2, shallow=False)
else:
return is_date_equal
else:
return False
def build_file_size_map(files):
size_map = {}
for f in files:
size = get_file_size(f)
if not size in size_map:
size_map[size] = []
size_map[size].append(f)
return size_map
def get_file_size_map(parent_dir):
all_files = get_files_recursive(parent_dir)
return build_file_size_map(all_files)
def list_chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]