-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathget_dataset.py
More file actions
44 lines (30 loc) · 1.11 KB
/
get_dataset.py
File metadata and controls
44 lines (30 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import shutil
import zipfile
import requests
from config import config
"""
Fetch the dataset from the COCO repository and extract it to data/images.
Currently fetches the 2017 Validation dataset, containing 5000 images and totalling 788MB.
"""
print("Creating directories...")
os.makedirs(config.get_path(config.config["datasets"]["images"]), exist_ok=True)
COCO_DATASET_URL = "http://images.cocodataset.org/zips/val2017.zip"
print("Downloading dataset...")
r = requests.get(COCO_DATASET_URL, stream=True)
with open("data.zip", "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
print("Extracting dataset...")
image_dir = config.get_path(config.config["datasets"]["images"])
z = zipfile.ZipFile("data.zip", "r")
z.extractall(image_dir)
for root, dirs, files in os.walk(config.get_path(os.path.join(image_dir, "val2017"))):
for file in files:
shutil.move(
os.path.join(root, file), config.get_path(os.path.join(root, "..", file))
)
os.rmdir(os.path.join(image_dir, "val2017"))
print("Deleting zip file...")
os.remove("data.zip")
print("Done!")