-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_tables.py
177 lines (142 loc) · 5.27 KB
/
update_tables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import json
import os
from math import nan
import pandas as pd
import numpy as np
from mobie.metadata import add_to_image_dict
from mobie.tables import compute_default_table
from create_grid_datasets import get_resolution
from mock_segmentations import mock_segmentation
TABLE_NAME = './Revisions_Table S1_List of tomograms and annotations.xlsx'
DATASETS = {
'Calu3_MOI0.5_24h_H2': 'T6 - 24h_MOI0.5',
'Calu3_MOI5_12h_E3': 'T4 - 12h_MOI5',
'Calu3_MOI5_24h_C2': 'T5 - 24h_MOI5',
'Calu_MOI5_6h_K2': 'T3 - 6h_MOI5',
'E2094_mock_O1': 'T7 - Mock'
}
TABLE_LENS = {
'Calu3_MOI5_24h_C2': 57,
'E2094_mock_O1': 13
}
def make_default_table(dataset, resolution):
key = 'setup0/timepoint0/s0'
tmp_folder = f'./tmp_seg_{dataset}'
tmp_table = os.path.join(tmp_folder, 'default.csv')
seg_path = os.path.join('data', dataset, 'images', 'local', 'em-tomogram-segmentation.n5')
compute_default_table(seg_path, key, tmp_table, resolution,
tmp_folder=tmp_folder, target='local',
max_jobs=4)
default_table = pd.read_csv(tmp_table, sep='\t')
default_table = default_table.drop(axis=1, index=0)
label_name = ["label_id"]
valid_default_names = [
"anchor_x",
"anchor_y",
"anchor_z",
"bb_min_x",
"bb_min_y",
"bb_min_z",
"bb_max_x",
"bb_max_y",
"bb_max_z"
]
label_table = default_table[label_name]
default_table = default_table[valid_default_names]
return default_table, label_table
def parse_table(table_path, dataset, valid_names):
sheet_name = DATASETS[dataset]
tomo_table = pd.read_excel(table_path, sheet_name=sheet_name, header=1)
cols = tomo_table.columns.values
cols[1] = 'tomogram'
tomo_table.columns = cols
# for some of the sheets panda parses garbage rows, so we hard-code the number of rows
# to be parsed for these sheets
exp_len = TABLE_LENS.get(dataset, None)
if exp_len is not None:
tomo_table = pd.DataFrame(tomo_table.values[:exp_len],
columns=tomo_table.columns)
print(tomo_table['tomogram'])
tomo_table = tomo_table[valid_names]
tomo_table = tomo_table.replace(nan, 0)
tomo_table = tomo_table.replace('x', 1)
return tomo_table
def make_new_table(dataset, table_save_path, resolution, with_viral_structures, skip_hma):
valid_names = [
'tomogram' # this is for the filename
]
viral_names = [
'DMVs',
'Virions',
'DMVs Opening',
'DMS',
'connectors',
'Fused DMVs'
]
cellular_names = [
'zER',
'Peroxisomes',
'Golgi',
'VTC',
'ER',
'Mitochondria',
'MVB/Lys',
'Lamellar Bodies',
'Lipid droplets',
'autophagosomes',
'Glycogen clusters',
'Nucleus',
'PM'
]
if with_viral_structures:
valid_names = valid_names + viral_names + cellular_names
else:
valid_names = valid_names + cellular_names
tomo_table = parse_table(TABLE_NAME, dataset, valid_names)
old_names = tomo_table['tomogram'].values.copy()
new_names = ['%03i' % i for i in range(1, len(old_names) + 1)]
tomo_table['tomogram'] = new_names
if skip_hma:
hma_row = [i for i, val in enumerate(old_names) if 'hma' in val]
assert len(hma_row) == 1
tomo_table = tomo_table.drop(labels=hma_row, axis=0)
name_dict = dict(zip(old_names, new_names))
with open(f'tomo_names_{dataset}.json', 'w') as f:
json.dump(name_dict, f)
default_table, label_table = make_default_table(dataset, resolution)
print()
print(tomo_table.shape)
print(default_table.shape)
print()
table = np.concatenate([label_table.values,
tomo_table.values,
default_table.values],
axis=1)
cols = np.concatenate([label_table.columns.values,
tomo_table.columns.values,
default_table.columns.values])
table = pd.DataFrame(table, columns=cols)
print(table.shape)
table.to_csv(table_save_path, sep='\t', index=False)
def update_table(dataset):
ds_folder = os.path.join('./data', dataset)
seg_name = 'em-tomogram-segmentation'
seg_path = os.path.join(ds_folder, 'images', 'local', seg_name + '.xml')
scale_factor = (2, 8, 8)
resolution = get_resolution(dataset)
resolution = [res * sf for res, sf in zip(resolution, scale_factor)]
table_folder = os.path.join(ds_folder, 'tables', seg_name)
os.makedirs(table_folder, exist_ok=True)
if not os.path.exists(seg_path):
print("Creating mock segmentation")
tmp_folder = f'./tmp_seg_{dataset}'
mock_segmentation(dataset, seg_name, scale_factor, resolution, tmp_folder)
add_to_image_dict(ds_folder, 'segmentation', seg_path,
table_folder=table_folder)
table_save_path = os.path.join(table_folder, 'default.csv')
make_new_table(dataset, table_save_path, resolution,
with_viral_structures=dataset != 'E2094_mock_O1',
skip_hma=dataset == 'Calu3_MOI5_24h_C2')
if __name__ == '__main__':
ds_names = list(DATASETS.keys())
update_table(ds_names[4])