-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_augm_generate_train_cfg_files.py
executable file
·255 lines (216 loc) · 12.7 KB
/
data_augm_generate_train_cfg_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
This script generates a set of train config files (pose_config.yaml files) for a data augm study
- We consider 11 different data augmentation methods
- We train 12 models with different data augmentation settings.
- The first model defines the baseline data augmentation settings.
- The next 11 models use the same data augmentation settings as the baseline, except for one data augmentation method
- This script also copies the test config file from the 'parent' project to all the sub-projects for each model
Contributors: Sofia, Jonas, Sabrina
"""
import os, shutil, sys
import deeplabcut
from deeplabcut.utils.auxiliaryfunctions import read_config, edit_config
from deeplabcut.generate_training_dataset.trainingsetmanipulation import create_training_dataset
import re
import argparse
import pdb
def create_parameters_dict():
##################################################################
### Define parameters for each data augmentation method --maybe read a separate file?
# ATT! Parameters must be defined for True or False cases.
# Not defining a set of parameters will result in applying the parameters from the pose_config.yaml template
## Initialise baseline dict with params per data augm type
parameters_dict = dict() # define a class instead of a dict?
### General
parameters_dict['general'] = {'dataset_type': 'imgaug', # OJO! not all the following will be available?
'batch_size': 1, # 128
'apply_prob': 0.5,
'pre_resize': []} # Specify [width, height] if pre-resizing is desired
### Crop----is this applied if we select imgaug? I think so....
parameters_dict['crop'] = {False: {'crop_by': 0.0,
'cropratio': 0.0},
True: {'crop_by': 0.15,
'cropratio': 0.4}}#---------- these are only used if height, width passed to pose_imgaug
# from template:
# parameters_dict['crop'] = {'crop_size':[400, 400], # width, height,
# 'max_shift': 0.4,
# 'crop_sampling': 'hybrid',
# 'cropratio': 0.4}---------- crop ratio is used too
### Rotation
parameters_dict['rotation'] = {False:{'rotation': 0,
'rotratio': 0},
True:{'rotation': 25,
'rotratio': 0.4}}
### Scale
parameters_dict['scale'] = {False:{'scale_jitter_lo': 1.0,
'scale_jitter_up': 1.0},
True:{'scale_jitter_lo': 0.5,
'scale_jitter_up': 1.25}}
### Motion blur
# ATT motion_blur is not expected as a dictionary
parameters_dict['motion_blur'] = {False: {'motion_blur': False}, # motion_blur_params should not be defined if False, but check if ok
True: {'motion_blur': True,
'motion_blur_params':{"k": 7, "angle": (-90, 90)}}}
### Contrast
# ATT for Contrast a dict should be defined in the yaml file!
# also: log, linear, sigmoid, gamma params...include those too? [I think if they are not defined in the template we are good, they wont be set]
parameters_dict['contrast'] = {False: {'contrast': {'clahe': False,
'histeq': False}}, # ratios should not be defined if False, but check if ok
True:{'contrast': {'clahe': True,
'claheratio': 0.1,
'histeq': True,
'histeqratio': 0.1}}}
### Convolution
# ATT for Convolution a dict should be defined in the yaml file!
parameters_dict['convolution'] = {False: {'convolution': {'sharpen': False, # ratios should not be defined if False, but check if ok
'edge': False,
'emboss': False}}, # this needs to be fixed in pose_cfg.yaml template?
True: {'convolution':{'sharpen': True,
'sharpenratio': 0.3, #---- in template: 0.3, in pose_imgaug default is 0.1
'edge': True,
'edgeratio': 0.1, #--------
'emboss': True,
'embossratio': 0.1}}}
### Mirror
parameters_dict['mirror'] = {False: {'mirror': False},
True: {'mirror': True}}
### Grayscale
parameters_dict['grayscale'] = {False: {'grayscale': False},
True: {'grayscale': True}}
### Covering
parameters_dict["covering"] = {False: {'covering': False},
True: {'covering': True}}
### Elastic transform
parameters_dict["elastic_transform"] = {False: {'elastic_transform': False},
True: {'elastic_transform': True}}
### Gaussian noise
parameters_dict['gaussian_noise'] = {False: {'gaussian_noise': False},
True: {'gaussian_noise': True}}
return parameters_dict
#############################################
if __name__ == "__main__":
##########################################################
### Set config path of project with labelled data
# (we assume create_training_dataset has already been run)
config_path = sys.argv[1] #'/media/data/stinkbugs-DLC-2022-07-15/config.yaml' # '/Users/user/Desktop/sabris-mouse/sabris-mouse-nirel-2022-07-06/config.yaml'
# each model subfolder is named with the format: <modelprefix_pre>_<id>_<str_id>
modelprefix_pre = sys.argv[2] #"data_augm"
# Other params
TRAINING_SET_INDEX=0 # default;
TRAIN_ITERATION=1 # iteration in terms of frames extraction; default is 0. can this be extracted?
##########################################################
### Get config as dict and associated paths
cfg = read_config(config_path)
project_path = cfg["project_path"] # or: os.path.dirname(config_path) #dlc_models_path = os.path.join(project_path, "dlc-models")
training_datasets_path = os.path.join(project_path, "training-datasets")
# Get shuffles
iteration_folder = os.path.join(training_datasets_path, 'iteration-' + str(TRAIN_ITERATION))
dataset_top_folder = os.path.join(iteration_folder, os.listdir(iteration_folder)[0])
files_in_dataset_top_folder = os.listdir(dataset_top_folder)
list_shuffle_numbers = []
for file in files_in_dataset_top_folder:
if file.endswith(".mat"):
shuffleNum = int(re.findall('[0-9]+',file)[-1])
list_shuffle_numbers.append(shuffleNum)
list_shuffle_numbers.sort()
# Get train and test pose config file paths from base project, for each shuffle
list_base_train_pose_config_file_paths = []
list_base_test_pose_config_file_paths = []
for shuffle_number in list_shuffle_numbers:
base_train_pose_config_file_path_TEMP,\
base_test_pose_config_file_path_TEMP,\
_ = deeplabcut.return_train_network_path(config_path,
shuffle=shuffle_number,
trainingsetindex=0) # base_train_pose_config_file
list_base_train_pose_config_file_paths.append(base_train_pose_config_file_path_TEMP)
list_base_test_pose_config_file_paths.append(base_test_pose_config_file_path_TEMP)
###############################################################
## Create params dict
parameters_dict = create_parameters_dict()
############################################################################
## Define baseline
baseline = {'crop': True, #----check
'rotation': True,
'scale': True,
'mirror': False,
'contrast': True,
'motion_blur': True,
'convolution': False,
'grayscale': False,
'covering': True,
'elastic_transform': True,
'gaussian_noise': False}
#################################################
## Create list of strings identifying each model
list_of_data_augm_models_strs = ['baseline']
for ky in baseline.keys() :
list_of_data_augm_models_strs.append(ky) #'wo_' + ky)
#########################################
## Loop to train each model
for i, daug_str in enumerate(list_of_data_augm_models_strs):
###########################################################
# Create subdirs for this augmentation method
model_prefix = '_'.join([modelprefix_pre, "{0:0=2d}".format(i), daug_str]) # modelprefix_pre = aug_
aug_project_path = os.path.join(project_path, model_prefix)
aug_dlc_models = os.path.join(aug_project_path, "dlc-models", )
aug_training_datasets = os.path.join(aug_project_path, "training-datasets")
# create subdir for this model
try:
os.mkdir(aug_project_path)
except OSError as error:
print(error)
print("Skipping this one as it already exists")
continue
# copy tree 'training-datasets' of dlc project under subdir for the current model---copies training_dataset subdir
shutil.copytree(training_datasets_path, aug_training_datasets)
###########################################################
# Copy base train pose config file to the directory of this augmentation method
list_train_pose_config_path_per_shuffle = []
list_test_pose_config_path_per_shuffle = []
for j, sh in enumerate(list_shuffle_numbers):
one_train_pose_config_file_path,\
one_test_pose_config_file_path,\
_ = deeplabcut.return_train_network_path(config_path,
shuffle=sh,
trainingsetindex=TRAINING_SET_INDEX, # default
modelprefix=model_prefix)
# copy test and train config from base project to this subdir
os.makedirs(str(os.path.dirname(one_train_pose_config_file_path))) # create parentdir 'train'
os.makedirs(str(os.path.dirname(one_test_pose_config_file_path))) # create parentdir 'test'
# copy base train config file
shutil.copyfile(list_base_train_pose_config_file_paths[j],
one_train_pose_config_file_path)
# copy base test config file
shutil.copyfile(list_base_test_pose_config_file_paths[j],
one_test_pose_config_file_path)
# add to list
list_train_pose_config_path_per_shuffle.append(one_train_pose_config_file_path)
list_test_pose_config_path_per_shuffle.append(one_test_pose_config_file_path)
#####################################################
# Create dict with the data augm params for this model
# initialise dict with gral params
edits_dict = dict()
edits_dict.update(parameters_dict['general'])
for ky in baseline.keys():
if daug_str == ky:
# Get params that correspond to the opposite state of the method daug_str in the baseline
d_temp = parameters_dict[ky][not baseline[ky]]
# add to edits dict
edits_dict.update(d_temp)
else:
# Get params that correspond to the same state as the baseline
d_temp = parameters_dict[ky][baseline[ky]]
# add to edits dict
edits_dict.update(d_temp)
# print
print('-----------------------------------')
if daug_str == 'baseline':
print('Data augmentation model {}: {}'.format(i, daug_str))
else:
print('Data augmentation model {}: "{}" opposite to baseline'.format(i, daug_str))
[print('{}: {}'.format(k,v)) for k,v in edits_dict.items()]
print('-----------------------------------')
##################################################
# Edit config for this data augmentation setting
for j, sh in enumerate(list_shuffle_numbers):
edit_config(str(list_train_pose_config_path_per_shuffle[j]), edits_dict)