From ff9cc37696ce7a57d5ea0f71bed5e1802b591f87 Mon Sep 17 00:00:00 2001 From: John Hany Date: Sun, 19 Mar 2017 13:50:31 +0800 Subject: [PATCH 1/3] Add 'save' command to save images as jpg files. Add 'save' entry in command choices as well as in help text. The new 'save' command uses opencv interface for writing image files to disk. --- data.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/data.py b/data.py index 42dac66..1a1f803 100644 --- a/data.py +++ b/data.py @@ -55,16 +55,37 @@ def export_images(db_path, out_dir, flat=False, limit=-1): if count % 1000 == 0: print('Finished', count, 'images') +def save_images(db_path, out_dir): + print('Saving', db_path, 'to', out_dir) + env = lmdb.open(db_path, map_size=1099511627776, + max_readers=100, readonly=True) + count = 0 + with env.begin(write=False) as txn: + cursor = txn.cursor() + for key, val in cursor: + image_out_dir = out_dir + if not exists(image_out_dir): + os.makedirs(image_out_dir) + image_out_path = join(image_out_dir, key + '.jpg') + img = cv2.imdecode( + numpy.fromstring(val, dtype=numpy.uint8), 1) + cv2.imwrite(image_out_path, img) + count += 1 + if count % 1000 == 0: + print('Finished', count, 'images') def main(): parser = argparse.ArgumentParser() parser.add_argument('command', nargs='?', type=str, - choices=['view', 'export'], + choices=['view', 'export', 'save'], help='view: view the images in the lmdb database ' 'interactively.\n' 'export: Export the images in the lmdb databases ' 'to a folder. The images are grouped in subfolders' - ' determinted by the prefiex of image key.') + ' determinted by the prefiex of image key.\n' + 'save: Decode images from lmdb databases and ' + ' save as jpg files to a folder.' + ' Requires opencv installed for python2.7.') parser.add_argument('lmdb_path', nargs='+', type=str, help='The path to the lmdb database folder. ' 'Support multiple database paths.') @@ -83,6 +104,8 @@ def main(): view(lmdb_path) elif command == 'export': export_images(lmdb_path, args.out_dir, args.flat) + elif command == 'save': + save_images(lmdb_path, args.out_dir) if __name__ == '__main__': From 43c2d58a8e641779c1da70b1eaff3182a66194b8 Mon Sep 17 00:00:00 2001 From: John Hany Date: Sun, 19 Mar 2017 13:53:14 +0800 Subject: [PATCH 2/3] Add usage for 'save' command --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 6e34169..df89c09 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,12 @@ Export the images to a folder python2.7 data.py export --out_dir ``` +Save the images as jpg files to a folder + +```bash +python2.7 data.py save --out_dir +``` + ### Example: Export all the images in valuation sets in the current folder to a From 05694ec7ffce2e06236c85b1c2d3db60858c74a3 Mon Sep 17 00:00:00 2001 From: John Hany Date: Tue, 21 Mar 2017 15:34:15 +0800 Subject: [PATCH 3/3] Add save_images_fast() with lmdb port Change exported file format from *.webp to *.jpg in export_images(), and we get save_images_fast(). It's a lot faster than cv2.imwrite() when exporting jpeg files. --- data.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/data.py b/data.py index 1a1f803..c816194 100644 --- a/data.py +++ b/data.py @@ -73,6 +73,29 @@ def save_images(db_path, out_dir): count += 1 if count % 1000 == 0: print('Finished', count, 'images') + +def save_images_fast(db_path, out_dir, flat=False, limit=-1): + print('Saving', db_path, 'to', out_dir) + env = lmdb.open(db_path, map_size=1099511627776, + max_readers=100, readonly=True) + count = 0 + with env.begin(write=False) as txn: + cursor = txn.cursor() + for key, val in cursor: + if not flat: + image_out_dir = join(out_dir, '/'.join(key[:6])) + else: + image_out_dir = out_dir + if not exists(image_out_dir): + os.makedirs(image_out_dir) + image_out_path = join(image_out_dir, key + '.jpg') + with open(image_out_path, 'w') as fp: + fp.write(val) + count += 1 + if count == limit: + break + if count % 1000 == 0: + print('Finished', count, 'images') def main(): parser = argparse.ArgumentParser() @@ -105,7 +128,8 @@ def main(): elif command == 'export': export_images(lmdb_path, args.out_dir, args.flat) elif command == 'save': - save_images(lmdb_path, args.out_dir) + #save_images(lmdb_path, args.out_dir) + save_images_fast(lmdb_path, args.out_dir, args.flat) if __name__ == '__main__':