-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprocess_fashion_data.py
50 lines (45 loc) · 2.13 KB
/
process_fashion_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import h5py
from scipy.io import loadmat
import cv2
import numpy as np
from torch.utils.serialization import load_lua
import os
import torch
import cPickle
import random
import shutil
root = './datasets'
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} "
with h5py.File('./G2.h5', 'r') as f:
a = loadmat('./language_original.mat')
# dict_keys(['__header__', '__version__', '__globals__', 'engJ', 'cate_new', 'color_', 'gender_', 'sleeve_', 'img_root', 'nameList', 'codeJ'])
index = loadmat('./ind.mat')
# ['__globals__', '__header__', 'test_ind', 'test_set_pair_ind', 'train_ind', '__version__']
for i in range(70000):
dict = {}
init = np.zeros((104,1))
count = 0
for char in a['engJ'][index['train_ind'][i,0]-1][0][0]:
init[count, 0] = alphabet.index(char) + 1
count += 1
init = torch.ByteTensor(init)
dict['char'] = init
dict['img'] = a['nameList'][index['train_ind'][i,0]-1][0][0]
name = a['nameList'][index['train_ind'][i, 0] - 1][0][0]
if not os.path.exists(os.path.join(root, 'FashionGAN_txt', name.strip().split('/')[1])):
os.makedirs(os.path.join(root, 'FashionGAN_txt', name.strip().split('/')[1]))
with open(os.path.join(root, 'FashionGAN_txt', name.strip().split('/')[1], name.strip().split('/')[-1][:-4] + '.pkl'), 'wb') as pkl_file:
cPickle.dump(dict, pkl_file)
print(type(f['ih'][0]))
print(f['ih_mean'])
img = f['ih'][index['train_ind'][i,0]-1].transpose(2,1,0) + f['ih_mean'].value.transpose(2,1,0)
if not os.path.exists(os.path.join(root, 'img', name.strip().split('/')[1])):
os.makedirs(os.path.join(root,'img', name.strip().split('/')[1]))
if os.path.isfile(os.path.join(root, name)):
continue
else:
cv2.imwrite(os.path.join(root, name), np.stack([img[:,:,2],img[:,:,1],img[:,:,0]],axis=-1) * 255.)
with open(os.path.join(root, 'FashionGAN_txt','trainclasses.txt'), 'wb') as f:
for _, dirs, file in os.walk('./datasets/img/'):
for dir in dirs:
f.write(dir + '\n')