-
Notifications
You must be signed in to change notification settings - Fork 10
/
convert_tuh_to_bids.py
204 lines (172 loc) · 7.51 KB
/
convert_tuh_to_bids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
"""Convert TUH Abnormal Dataset to the BIDS format.
See "The Temple University Hospital EEG Corpus: Electrode Location and Channel
Labels" document on TUH EEG's website for more info on the dataset conventions:
https://www.isip.piconepress.com/publications/reports/2020/tuh_eeg/electrodes/
E.g., to run on drago:
>>> python convert_tuh_to_bids.py \
--tuab_data_dir /storage/store/data/tuh_eeg/www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_abnormal/v2.0.0/edf \
--bids_data_dir /storage/store2/data/TUAB-healthy-bids \
--healthy_only True \
--reset_session_indices True
"""
import re
import argparse
import datetime
import mne
import numpy as np
from braindecode.datasets import TUHAbnormal
from mne_bids import write_raw_bids, print_dir_tree, make_report, BIDSPath
SEX_TO_MNE = {'n/a': 0, 'm': 1, 'f': 2}
def rename_tuh_channels(ch_name):
"""Rename TUH channels and ignore non-EEG and custom channels.
Rules:
- 'Z' should always be lowercase.
- 'P' following a 'F' should be lowercase.
"""
exclude = [ # Defined by hand - do we really want to remove them?
'LOC',
'ROC',
'EKG1',
]
# match = re.findall(r'^EEG\s([A-Z]\w+)-REF$', ch_name)
match = re.findall(r'^([A-Z]\w+)-REF$', ch_name)
if len(match) == 1:
out = match[0]
out = out.replace('FP', 'Fp').replace('Z', 'z') # Apply rules
else:
out = ch_name
if out in exclude:
out = ch_name
return out
def _convert_tuh_recording_to_bids(ds, bids_save_dir, desc=None):
"""Convert single TUH recording to BIDS.
Parameters
----------
ds : braindecode.datasets.BaseDataset
TUH recording to convert to BIDS.
bids_save_dir : st
Directory where to save the BIDS version of the dataset.
desc : None | pd.Series
Description of the recording, containing subject and recording
information. If None, use `ds.description`.
"""
raw = ds.raw
raw.pick_types(eeg=True) # Only keep EEG channels
if desc is None:
desc = ds.description
# Extract reference
# XXX Not supported yet in mne-bids: see mne-bids/mne_bids/write.py::766
ref = re.findall(r'\_tcp\_(\w\w)', desc['path'])
if len(ref) != 1:
raise ValueError('Expecting one directory level with tcp in it.')
elif ref[0] == 'ar': # average reference
reference = ''
elif ref[0] == 'le': # linked ears
reference = ''
else:
raise ValueError(f'Unknown reference found in file name: {ref[0]}.')
# Rename channels to a format readable by MNE
raw.rename_channels(rename_tuh_channels)
# Ignore channels that are not in the 10-5 system
montage = mne.channels.make_standard_montage('standard_1005')
ch_names = np.intersect1d(raw.ch_names, montage.ch_names)
raw.pick_channels(ch_names)
raw.set_montage(montage)
# Add pathology and train/eval labels
# XXX The following will break if it's not TUAB
# XXX Also, should be written to the `..._scans.tsv` file instead of being
# annotations
# onset = raw.times[0]
# duration = raw.times[-1] - raw.times[0]
# raw.annotations.append(
# onset, duration, 'abnormal' if desc['pathological'] else 'normal')
# raw.annotations.append(
# onset, duration, 'train' if desc['train'] else 'eval')
# Make up birthday based on recording date and age to allow mne-bids to
# compute age
birthday = datetime.datetime(desc['year'] - desc['age'], desc['month'], 1)
birthday -= datetime.timedelta(weeks=4)
sex = desc['gender'].lower() # This assumes gender=sex
# Add additional data required by BIDS
mrn = str(desc['subject']).zfill(8) # MRN: Medical Record Number
session_nb = str(desc['session']).zfill(3)
subject_info = {
'participant_id': mrn,
'birthday': (birthday.year, birthday.month, birthday.day),
'sex': SEX_TO_MNE[sex],
'handedness': None # Not available
}
raw.info['line_freq'] = 60 # Data was collected in North America
raw.info['subject_info'] = subject_info
task = 'rest'
bids_path = BIDSPath(
subject=mrn, session=session_nb, task=task, run=desc['segment'],
root=bids_save_dir, datatype='eeg', check=True)
write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True,
format='BrainVision')
def convert_tuab_to_bids(tuh_data_dir, bids_save_dir, healthy_only=True,
reset_session_indices=True, concat_split_files=True,
n_jobs=1):
"""Convert TUAB dataset to BIDS format.
Parameters
----------
tuh_data_dir : str
Directory where the original TUAB dataset is saved, e.g.
`/tuh_eeg/www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_abnormal/v2.0.0/edf`.
bids_save_dir : str
Directory where to save the BIDS version of the dataset.
healthy_only : bool
If True, only convert recordings with "normal" EEG.
reset_session_indices : bool
If True, reset session indices so that each subject has a session 001,
and that there is no gap between session numbers for a subject.
concat_split_files : bool
If True, concatenate recordings that were split into a single file.
This is based on the "token" field of the original TUH file paths.
n_jobs : None | int
Number of jobs for parallelization.
"""
concat_ds = TUHAbnormal(tuh_data_dir, recording_ids=None, n_jobs=n_jobs)
if healthy_only:
concat_ds = concat_ds.split(by='pathological')['False']
description = concat_ds.description # Make a copy because `description` is
# made on-the-fly
if concat_split_files:
n_segments_per_session = description.groupby(
['subject', 'session'])['segment'].apply(list).apply(len)
if n_segments_per_session.unique() != np.array([1]):
raise NotImplementedError(
'Concatenation of split files is not implemented yet.')
else:
description['segment'] = '001'
if reset_session_indices:
description['session'] = description.groupby(
'subject')['session'].transform(lambda x: np.arange(len(x)) + 1)
for ds, (_, desc) in zip(concat_ds.datasets, description.iterrows()):
assert ds.description['path'] == desc['path']
_convert_tuh_recording_to_bids(
ds, bids_save_dir, desc=desc)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert TUH to BIDS.')
parser.add_argument(
'--tuab_data_dir', type=str,
default='/storage/store/data/tuh_eeg/www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_abnormal/v2.0.0/edf',
help='Path to the original data.')
parser.add_argument(
'--bids_data_dir', type=str,
help='Path to where the converted data should be saved.')
parser.add_argument(
'--healthy_only', type=bool, default=False,
help='Only convert recordings of healthy subjects (default: False)')
parser.add_argument(
'--reset_session_indices', type=bool, default=True,
help='Reset session indices (default: True)')
parser.add_argument(
'--n_jobs', type=int, default=1,
help='number of parallel processes to use (default: 1)')
args = parser.parse_args()
convert_tuab_to_bids(
args.tuab_data_dir, args.bids_data_dir, healthy_only=args.healthy_only,
reset_session_indices=args.reset_session_indices, n_jobs=args.n_jobs)
print_dir_tree(args.bids_data_dir)
print(make_report(args.bids_data_dir))