-
Notifications
You must be signed in to change notification settings - Fork 0
/
counts.py
353 lines (252 loc) · 12.3 KB
/
counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import edt
import h5py
import numpy as np
from scipy.ndimage import distance_transform_edt, gaussian_filter, zoom
import os
import yaml
cache = {}
D0 = '/data/projects/weilab/dataset/hydra/results/'
D1 = '/data/rothmr/hydra/stitched/'
D2 = '/data/rothmr/hydra/lists/'
#all files should be in the shape of the neuron mask file for the neuron name eg. "KR5"
#use for non chunking pipeline
def load_data(name):
print("begin loading data")
with h5py.File(f"{D1}neuron_{name}_box_30-32-32.h5", 'r') as f: #stitched mask, contains everything w segids
cache["box"] = f["main"][:]
print("done loading neuron box")
with h5py.File(f"{D0}vesicle_big_{name}_30-8-8.h5", 'r') as f: #high res large vesicles data for [name]
cache["lv"] = f["main"][:]
print("done loading LV")
with h5py.File(f"{D0}vesicle_small_{name}_30-8-8.h5", 'r') as f: #high res small vesicles data for [name]
cache["sv"] = f["main"][:]
print("done loading SV")
#use for chunking pipeline
def load_neurons(name):
print("begin loading data")
with h5py.File(f"{D1}neuron_{name}_box_30-32-32.h5", 'r') as f: #stitched mask, contains everything w segids
cache["box"] = f["main"][:]
print("done loading neuron box")
#always splitting along the y axis; index starting at 'chunk 0'
#chunks are not padded -> coords are adjusted in the calculate_vesicles_within function
#returns the chunk_length variable for offset purposes
def load_lv_chunks(chunk_num, num_chunks):
print(f"begin loading LV chunk #{chunk_num+1}") #bc zero indexing
chunk_length = 0 #initialize for scope
with h5py.File(f"{D0}vesicle_big_{name}_30-8-8.h5", 'r') as f: #high res large vesicles data for [name]
shape = f["main"].shape
dtype = f["main"].dtype
#calculate chunk_length (last chunk might be this plus some remainder if this doesn't divide evenly)
chunk_length = (shape[1])//num_chunks #integer division, dividing up y axis length
if(chunk_num!=num_chunks-1):
cache["lv_chunk"] = f["main"][:, chunk_num*chunk_length:(chunk_num+1)*chunk_length, :]
else: #case of the last chunk
cache["lv_chunk"] = f["main"][:, chunk_num*chunk_length:, :] #go to end of the file - last chunk includes any leftover stuff
print(f"done loading LV chunk #{chunk_num+1}") #bc zero indexing
return chunk_length
#same logic as load_lv_chunks
def load_sv_chunks(chunk_num, num_chunks):
print(f"begin loading SV chunk #{chunk_num+1}") #bc zero indexing
chunk_length = 0 #initialize for scope
with h5py.File(f"{D0}vesicle_small_{name}_30-8-8.h5", 'r') as f: #high res large vesicles data for [name]
shape = f["main"].shape
dtype = f["main"].dtype
#calculate chunk_length (last chunk might be this plus some remainder if this doesn't divide evenly)
chunk_length = (shape[1])//num_chunks #integer division, dividing up y axis length
if(chunk_num!=num_chunks-1):
cache["sv_chunk"] = f["main"][:, chunk_num*chunk_length:(chunk_num+1)*chunk_length, :]
else: #case of the last chunk
cache["sv_chunk"] = f["main"][:, chunk_num*chunk_length:, :] #go to end of the file - last chunk includes any leftover stuff
print(f"done loading SV chunk #{chunk_num+1}") #bc zero indexing
return chunk_length
def read_yml(filename):
with open(filename, 'r') as file:
data = yaml.safe_load(file)
return data
#calculate num of vesicles within any given mask; takes in mask and corresponding vesicles file
#vesicles file is in higher res
#if chunking - accounts for potential overlap vesicles across chunks
def calculate_vesicles_within(mask, vesicles=None, save_list=False, name=None, chunking=False, num_chunks=0, sv=False, lv=False):
bool_mask = (mask>=1)
bool_mask = zoom(bool_mask, (1, 4, 4), order=0) #change into ves file res
print("done changing mask file resolution")
if(chunking): #find lists of seg ids within the mask, for each chunk. merge uniquely into one list, then return final list length
if(lv):
within_ids_set = set() #within the mask
total_ids_set = set() #within the full vesicles file
#loop thru each chunk and extend list every time
current_chunk = 0
while(current_chunk!=num_chunks): #this loop runs [num_chunks] times
chunk_length = load_lv_chunks(current_chunk, num_chunks)
y_start = current_chunk * chunk_length
#cases for last chunk or not
if(current_chunk!=num_chunks-1): #not last chunk
cache["mask_chunk"] = bool_mask[:, y_start:(current_chunk+1)*chunk_length, :]
else: #last chunk
cache["mask_chunk"] = bool_mask[:, y_start:, :]
labeled_vesicles = cache["lv_chunk"]
mask_chunk = cache["mask_chunk"]
print("checkpoint 1")
vesicle_coords = np.column_stack(np.nonzero(labeled_vesicles))
print("checkpoint 2")
unique_labels = set(labeled_vesicles.ravel())
total_ids_set.update(unique_labels) #updating our set
mask_values = mask_chunk[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]]
print("checkpoint 3")
vesicles_within_mask = set(labeled_vesicles[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]][mask_values])
within_ids_set.update(vesicles_within_mask)
current_chunk += 1
print("total num LV: ", len(total_ids_set)-1) #minus bg label 0
#only for LV
if(save_list):
#export list as a txt file
path = f'{D2}within_{name}.txt'
with open(path, "w") as f:
f.write(str(list(within_ids_set)))
print("list for types exported")
return len(within_ids_set) #num of unique ids within the mask
#same logic as for lv
if(sv):
within_ids_set = set() #within the mask
total_ids_set = set() #within the full vesicles file
#loop thru each chunk and extend list every time
current_chunk = 0
while(current_chunk!=num_chunks): #this loop runs [num_chunks] times
chunk_length = load_sv_chunks(current_chunk, num_chunks)
y_start = current_chunk * chunk_length
#cases for last chunk or not
if(current_chunk!=num_chunks-1): #not last chunk
cache["mask_chunk"] = bool_mask[:, y_start:(current_chunk+1)*chunk_length, :]
else: #last chunk
cache["mask_chunk"] = bool_mask[:, y_start:, :]
labeled_vesicles = cache["sv_chunk"]
mask_chunk = cache["mask_chunk"]
print("checkpoint 1")
vesicle_coords = np.column_stack(np.nonzero(labeled_vesicles))
print("checkpoint 2")
unique_labels = set(labeled_vesicles.ravel())
total_ids_set.update(unique_labels) #updating our set
mask_values = mask_chunk[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]]
print("checkpoint 3")
vesicles_within_mask = set(labeled_vesicles[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]][mask_values])
within_ids_set.update(vesicles_within_mask)
current_chunk += 1
print("total num SV: ", len(total_ids_set)-1) #minus bg label 0
return len(within_ids_set)
else: #no chunking
num_vesicles_within = 0
labeled_vesicles = vesicles
vesicle_coords = np.column_stack(np.nonzero(labeled_vesicles))
unique_labels = np.unique(labeled_vesicles)
num_labels = len(unique_labels) - 1 #minus bg label 0
print("total num of vesicles: ", num_labels)
vesicles_within_mask = np.zeros(num_labels, dtype=bool)
mask_values = bool_mask[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]]
vesicles_within_mask = np.unique(labeled_vesicles[vesicle_coords[:, 0], vesicle_coords[:, 1], vesicle_coords[:, 2]][mask_values])
num_vesicles_within = len(vesicles_within_mask)
if(save_list):
#export vesicles_within_mask as a txt file
path = f'{D2}within_{name}.txt'
with open(path, "w") as f:
f.write(str(vesicles_within_mask))
print("list for types exported")
return num_vesicles_within
#use if neurons and vesicles files have different res (testing phase)
def convert_coords(original_coords, original_shape, original_res, target_shape, target_res):
relative_coord = np.array(original_coords) / np.array(original_shape)
target_coord = np.round(relative_coord * np.array(target_shape)).astype(int)
return target_coord
def expand_mask(original_mask, threshold_nm, res): #threshold in physical units, res in xyz
expanded_mask = np.copy(original_mask)
print("begin distance transform for expansion")
dt = edt.edt(1 - original_mask, anisotropy=(res[2],res[1],res[0])) #needs to be in xyz by default for edt
print("end distance transform for expansion")
doubled_perimeter = dt <= threshold_nm #all in nm
expanded_mask[doubled_perimeter] = 1
expanded_mask_binary = (expanded_mask>=1).astype(int)
return expanded_mask_binary
#for usage for a singular mask, docked vesicle counts
def erode_mask(original_mask, threshold_nm, res):
eroded_mask = np.copy(original_mask)
dt = edt.edt(1 - original_mask, anisotropy=(res[2],res[1],res[0])) #needs to be in xyz by default for edt
perimeter = dt <= threshold_nm #all in nm; double perim
eroded_mask[perimeter] = 0
eroded_mask_binary = (eroded_mask>=1).astype(int)
return eroded_mask_binary
def docked_vesicles(original_mask, vesicles, threshold_nm, res): #use 250nm
eroded_mask = erode_mask(original_mask, threshold_nm, res)
return calculate_vesicles_within(eroded_mask, vesicles), calculate_volume_nm(eroded_mask, res)
#for visualization uses only
def extract_perimeter(expanded_mask, original_mask):
#expanded mask should already be binary
#change original mask to binary
original_mask_binary = (original_mask>=1).astype(int)
original_mask_inverted = ~original_mask_binary
#return everything in expanded but NOT in original
perimeter_only_mask = np.copy(expanded_mask)
perimeter_only_mask[perimeter_only_mask & original_mask_inverted] = 0
return perimeter_only_mask
#takes in a list of masks, outputs a binary mask
def mask_intersection(mask_list): #assume same size files, assume list has size at least 1
intersection = np.bitwise_and.reduce(mask_list)
return intersection
#calculate the volume given a boolean mask & convert to nm^3 units
def calculate_volume_nm(mask, res):
binary_mask = (mask>=1).astype(int)
volume_voxels = np.sum(binary_mask)
volume_nm = volume_voxels * (res[0]*res[1]*res[2])
return volume_nm
def neuron_name_to_id(name):
if isinstance(name, str):
name = [name]
return [neuron_dict[x] for x in name]
if __name__ == "__main__":
neuron_dict = read_yml('/data/projects/weilab/dataset/hydra/mask_mip1/neuron_id.txt')
res1 = (30,8,8)
res2 = (30,32,32)
chunking = True #use chunking only if memory issues
to_calculate = ['SHL17']
for name in to_calculate:
print(f"-----------{name}-----------")
nid = neuron_name_to_id(name)
threshold = 1000 #nm
#docked_threshold = 250
if(chunking):
#only load neurons, load vesicles later in chunks
load_neurons(name)
else:
#load everything
load_data(name)
print("done loading data")
neurons = cache["box"]
#unique_ids = np.unique(neurons)
neuron_only = np.zeros(neurons.shape, dtype=neurons.dtype)
neuron_only[neurons==nid] = 1 #binary mask
print(f"Volume (in nm^3) of {name} only: ", calculate_volume_nm(neuron_only, res2))
other_neurons = np.zeros(neurons.shape, dtype = neurons.dtype)
other_neurons[(neurons!=nid) & (neurons!=0)] = 1 #binary mask
expanded_others = expand_mask(other_neurons, threshold, res2)
print("mask expanding done")
intersections = mask_intersection((neuron_only, expanded_others))
print("mask intersection done")
print(f"Volume (in nm^3) of intersections for {name} and {threshold}nm: ", calculate_volume_nm(intersections, res2))
print("\ndone initializing everything\n")
if(chunking):
#implement chunking
num_chunks = 4
print(f"calculate using chunking with {num_chunks} chunks")
print("---LV---")
print(f"LV within neuron {name} & within {threshold}nm of another neuron: ",
calculate_vesicles_within(intersections, save_list=False, name=name, chunking=True, num_chunks=num_chunks, lv=True))
print("---SV---")
print(f"SV within neuron {name} & within {threshold}nm of another neuron: ",
calculate_vesicles_within(intersections, save_list=False, name=name, chunking=True, num_chunks=num_chunks, sv=True))
else:
#no chunking, pass in vesicle files directly as parameter
print("no chunking")
print("---LV---")
print(f"LV within neuron {name} & within {threshold}nm of another neuron: ",
calculate_vesicles_within(intersections, vesicles=cache["lv"], save_list=True, name=name), "\n")
print("---SV---")
print(f"SV within neuron {name} & within {threshold}nm of another neuron: ",
calculate_vesicles_within(intersections, vesicles=cache["sv"], name=name))