-
Notifications
You must be signed in to change notification settings - Fork 5
/
get_data.py
77 lines (51 loc) · 2.16 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import requests
import os
import zipfile
data_dir = 'data'
if not os.path.exists(data_dir):
os.mkdir(data_dir)
image_zip_file = 'WIDER_train.zip'
annotation_url = 'http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip'
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params={'id': id}, stream=True)
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm': token}
response = session.get(URL, params=params, stream=True)
save_response_content(response, destination)
def download_file_from_web_server(url, destination):
local_filename = url.split('/')[-1]
# NOTE the stream=True parameter
response = requests.get(url, stream=True)
save_response_content(response, os.path.join(destination, local_filename))
return local_filename
# TODO Add progress bar
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
def extract_zip_file(zip_file_name, destination):
zip_ref = zipfile.ZipFile(zip_file_name, 'r')
zip_ref.extractall(destination)
zip_ref.close()
if __name__ == "__main__":
filename = 'WIDER_train.zip'
file_id = '0B6eKvaijfFUDQUUwd21EckhUbWs'
destination = 'data/WIDER_train.zip'
print('downloading the images from google drive...')
download_file_from_google_drive(file_id, destination)
extract_zip_file(os.path.join(data_dir, image_zip_file), data_dir)
print('downloading the bounding boxes annotations...')
annotation_zip_file = download_file_from_web_server(annotation_url,
data_dir)
extract_zip_file(os.path.join(data_dir, annotation_zip_file), data_dir)
print('done')