neuron features in morph_stats (#874)

The features in the 'neuron' entry of the config dict will not be run on each neurite_type, but on the whole neuron only once. This avoid duplicating features if they are the same on all neurite types, such as the ones in neuronfunc.py The returned dataframe is now of the form: ``` | property | axon | all | neuron | name | feature_1 | feature_2 | feature_1 | feature_2 | feature_3 ------------------------------------------------------------------------------------- 0 | simple | 0.1 | 0.2 | 0.15 | 0.25 | 1000 ```
BlueBrain · Mar 5, 2021 · 227636c · 227636c
1 parent a29cfaa
commit 227636c
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 36 deletions.
diff --git a/neurom/apps/morph_stats.py b/neurom/apps/morph_stats.py
@@ -100,9 +100,11 @@ def extract_dataframe(neurons, config, n_workers=1):
             - neurite_type: a list of neurite types for which features are extracted
               If not provided, all neurite_type will be used
             - neurite: a dictionary {{neurite_feature: mode}} where:
-                - neurite_feature is a string from NEURITEFEATURES
+                - neurite_feature is a string from NEURITEFEATURES or NEURONFEATURES
                 - mode is an aggregation operation provided as a string such as:
                   ['min', 'max', 'median', 'mean', 'std', 'raw', 'total']
+            - neuron: same as neurite entry, but it will not be run on each neurite_type,
+              but only once on the whole neuron.
         n_workers (int): number of workers for multiprocessing (on collection of neurons)
 
     Returns:
@@ -117,11 +119,6 @@ def extract_dataframe(neurons, config, n_workers=1):
         neurons = [neurons]
     config = config.copy()
 
-    # Only NEURITEFEATURES are considered since the dataframe is built by neurite_type
-    # NEURONFEATURES are discarded
-    if 'neuron' in config:
-        del config['neuron']
-
     func = partial(_run_extract_stats, config=config)
     if n_workers == 1:
         stats = list(map(func, neurons))
@@ -131,18 +128,12 @@ def extract_dataframe(neurons, config, n_workers=1):
         with multiprocessing.Pool(n_workers) as pool:
             stats = list(pool.imap(func, neurons))
 
-    columns = list(next(iter(stats[0][1].values())).keys())
-
+    columns = [('property', 'name')] + [
+        (key1, key2) for key1, data in stats[0][1].items() for key2 in data
+    ]
     rows = [[name] + list(chain.from_iterable(features.values() for features in data.values()))
             for name, data in stats]
-
-    columns = list(chain.from_iterable(
-        [[('neuron', 'name')],
-         product(map(lambda x: x.lower(), config.get('neurite_type', _NEURITE_MAP.keys())),
-                 columns)]))
-    columns = pd.MultiIndex.from_tuples(columns)
-
-    return pd.DataFrame(columns=columns, data=rows)
+    return pd.DataFrame(columns=pd.MultiIndex.from_tuples(columns), data=rows)
 
 
 def extract_stats(neurons, config):
@@ -152,11 +143,13 @@ def extract_stats(neurons, config):
         neurons: a neuron, population, neurite tree or list of neuron paths/str
         config (dict): configuration dict. The keys are:
             - neurite_type: a list of neurite types for which features are extracted
-              If not provided, all neurite_type will be used
+              If not provided, all neurite_type will be used.
             - neurite: a dictionary {{neurite_feature: mode}} where:
-                - neurite_feature is a string from NEURITEFEATURES
+                - neurite_feature is a string from NEURITEFEATURES or NEURONFEATURES
                 - mode is an aggregation operation provided as a string such as:
                   ['min', 'max', 'median', 'mean', 'std', 'raw', 'total']
+            - neuron: same as neurite entry, but it will not be run on each neurite_type,
+              but only once on the whole neuron.
 
     Returns:
         The extracted statistics
@@ -198,8 +191,7 @@ def _fill_stats_dict(data, stat_name, stat, shape):
         for mode in modes:
             stat_name = _stat_name(feature_name, mode)
             stat = eval_stats(feature, mode)
-            _fill_stats_dict(stats, stat_name, stat, func.shape)
-
+            _fill_stats_dict(stats['neuron'], stat_name, stat, func.shape)
     return dict(stats)
 
 
@@ -208,11 +200,8 @@ def get_header(results):
     ret = ['name', ]
     values = next(iter(results.values()))
     for k, v in values.items():
-        if isinstance(v, dict):
-            for metric in v.keys():
-                ret.append('%s:%s' % (k, metric))
-        else:
-            ret.append(k)
+        for metric in v.keys():
+            ret.append('%s:%s' % (k, metric))
     return ret
 
 
@@ -223,11 +212,9 @@ def generate_flattened_dict(headers, results):
         for header in headers:
             if header == 'name':
                 row.append(name)
-            elif ':' in header:
+            else:
                 neurite_type, metric = header.split(':')
                 row.append(values[neurite_type][metric])
-            else:
-                row.append(values[header])
         yield row
 
 

diff --git a/neurom/apps/tests/test_morph_stats.py b/neurom/apps/tests/test_morph_stats.py
@@ -58,7 +58,9 @@
 }
 
 REF_OUT = {
-    'mean_soma_radius': 0.13065629648763766,
+    'neuron': {
+        'mean_soma_radius': 0.13065629648763766,
+    },
     'axon': {
         'total_section_length': 207.87975220908129,
         'max_section_length': 11.018460736176685,
@@ -145,7 +147,7 @@ def test_extract_stats_single_neuron():
     assert_equal(set(res.keys()), set(REF_OUT.keys()))
     # Note: soma radius is calculated from the sphere that gives the area
     # of the cylinders described in Neuron.swc
-    assert_almost_equal(res['mean_soma_radius'], REF_OUT['mean_soma_radius'])
+    assert_almost_equal(res['neuron']['mean_soma_radius'], REF_OUT['neuron']['mean_soma_radius'])
 
     for k in ('all', 'axon', 'basal_dendrite', 'apical_dendrite'):
         assert_equal(set(res[k].keys()), set(REF_OUT[k].keys()))
@@ -196,7 +198,7 @@ def test_extract_dataframe():
     config = {'neurite': {'total_length_per_neurite': ['total']}}
     actual = ms.extract_dataframe(nrns, config)
     expected_columns = pd.MultiIndex.from_tuples(
-        [('neuron', 'name'),
+        [('property', 'name'),
          ('axon', 'total_total_length_per_neurite'),
          ('basal_dendrite', 'total_total_length_per_neurite'),
          ('apical_dendrite', 'total_total_length_per_neurite'),
@@ -231,7 +233,7 @@ def test_get_header():
     header = ms.get_header(fake_results)
     assert_equal(1 + 1 + 4 * (4 + 3), len(header))  # name + everything in REF_OUT
     ok_('name' in header)
-    ok_('mean_soma_radius' in header)
+    ok_('neuron:mean_soma_radius' in header)
 
 
 def test_generate_flattened_dict():

diff --git a/test_data/extracted-stats.csv b/test_data/extracted-stats.csv
@@ -1,4 +1,4 @@
-,neuron,axon,axon,axon,axon,axon,axon,axon,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,all,all,all,all,all,all,all
-,name,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2
-0,Neuron,11.018460736176685,207.8797522090813,276.7385765728952,10,0.0,0.0,49.52030596415,11.758281556059444,214.37304577550353,271.9412385728449,10.0,64.40167498405,0.0,53.750947521650005,11.652508126101711,418.43241643793476,556.2279268208382,10,64.00787233325,48.48197694465,51.575580778049996,11.758281556059444,840.6852144225195,1104.9077419665782,10,64.40167498405,48.48197694465,53.750947521650005
-1,simple,6.0,15.0,24.08554367752175,1,3.0,-2.0,0.0,,0.0,0.0,,,,,6.0,16.0,27.227136331111538,1,3.0,5.0,0.0,6.0,31.0,51.312680008633286,1,3.0,5.0,0.0
+,property,axon,axon,axon,axon,axon,axon,axon,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,apical_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,basal_dendrite,all,all,all,all,all,all,all,neuron
+,name,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_0,max_segment_midpoint_1,max_segment_midpoint_2,mean_soma_radius
+0,Neuron,11.018460736176685,207.8797522090813,276.7385765728952,10,0.0,0.0,49.52030596415,11.758281556059444,214.37304577550353,271.9412385728449,10.0,64.40167498405,0.0,53.750947521650005,11.652508126101711,418.43241643793476,556.2279268208382,10,64.00787233325,48.48197694465,51.575580778049996,11.758281556059444,840.6852144225195,1104.9077419665782,10,64.40167498405,48.48197694465,53.750947521650005,0.13065629648763766
+1,simple,6.0,15.0,24.08554367752175,1,3.0,-2.0,0.0,,0.0,0.0,,,,,6.0,16.0,27.227136331111538,1,3.0,5.0,0.0,6.0,31.0,51.312680008633286,1,3.0,5.0,0.0,1.0