From df5bc6215abc00b150bfb39d5de2defd9c03e190 Mon Sep 17 00:00:00 2001 From: Benoit Coste Date: Fri, 17 Jul 2020 11:24:53 +0200 Subject: [PATCH] New function extract_dataframe that extract morphometrics as a dataframe (#841) if the optional argument as_frame=True, otherwise the legacy return type (dict) is used The dataframe looks like: name neurite_type ... max_segment_midpoint_Y max_segment_midpoint_Z 0 Neuron axon ... 0.000000 49.520306 1 Neuron apical_dendrite ... 0.000000 53.750948 4 simple axon ... -2.000000 0.000000 5 simple apical_dendrite ... NaN NaN And the 'neurite_type' key becomes optional for `extract_dataframe` and `extract_stats` --- apps/morph_stats | 13 ++-- neurom/apps/morph_stats.py | 100 +++++++++++++++++++++++--- neurom/apps/tests/test_morph_stats.py | 58 ++++++++++++++- neurom/tests/test_stats.py | 4 +- setup.py | 1 + test_data/extracted-stats.csv | 9 +++ 6 files changed, 160 insertions(+), 25 deletions(-) create mode 100644 test_data/extracted-stats.csv diff --git a/apps/morph_stats b/apps/morph_stats index 5333f56da..7563991cd 100755 --- a/apps/morph_stats +++ b/apps/morph_stats @@ -31,24 +31,19 @@ import argparse import json import logging -import os import sys -import pkg_resources - import neurom as nm from neurom import exceptions from neurom.apps import get_config from neurom.apps.morph_stats import (extract_stats, generate_flattened_dict, - get_header, sanitize_config, full_config) + get_header, sanitize_config, full_config, + EXAMPLE_CONFIG) from neurom.io.utils import get_files_by_path from neurom.utils import NeuromJSON L = logging.getLogger(__name__) -CONFIG_PATH = pkg_resources.resource_filename( - 'neurom', 'config') - IGNORABLE_EXCEPTIONS = { 'SomaError': exceptions.SomaError, } @@ -58,7 +53,7 @@ def get_parser(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description='Morphology statistics extractor', - epilog='Example config: {}'.format(os.path.join(CONFIG_PATH, 'morph_stats.yaml'))) + epilog='Example config: {}'.format(EXAMPLE_CONFIG)) parser.add_argument('datapath', nargs='?', help='Path to a morphology data file or a directory') @@ -98,7 +93,7 @@ def main(args): config = full_config() else: try: - config = get_config(args.config, os.path.join(CONFIG_PATH, 'morph_stats.yaml')) + config = get_config(args.config, EXAMPLE_CONFIG) config = sanitize_config(config) except exceptions.ConfigError as e: L.error(str(e)) diff --git a/neurom/apps/morph_stats.py b/neurom/apps/morph_stats.py index f197f870c..385205ae9 100644 --- a/neurom/apps/morph_stats.py +++ b/neurom/apps/morph_stats.py @@ -30,21 +30,31 @@ import logging from collections import defaultdict from itertools import product +import os +import pkg_resources + + import numpy as np +import pandas as pd + import neurom as nm +from neurom.fst._core import FstNeuron from neurom.features import NEURONFEATURES, NEURITEFEATURES - from neurom.exceptions import ConfigError L = logging.getLogger(__name__) +EXAMPLE_CONFIG = os.path.join(pkg_resources.resource_filename( + 'neurom', 'config'), 'morph_stats.yaml') + def eval_stats(values, mode): """Extract a summary statistic from an array of list of values. Arguments: values: A numpy array of values - mode: A summary stat to extract. One of ['min', 'max', 'median', 'mean', 'std', 'raw'] + mode: A summary stat to extract. One of: + ['min', 'max', 'median', 'mean', 'std', 'raw', 'total'] Note: fails silently if values is empty, and None is returned """ @@ -73,12 +83,73 @@ def _stat_name(feat_name, stat_mode): return '%s_%s' % (stat_mode, feat_name) +def extract_dataframe(neurons, config): + """Extract stats grouped by neurite type from neurons. + + Arguments: + neurons: a neuron, population or neurite tree + config (dict): configuration dict. The keys are: + - neurite_type: a list of neurite types for which features are extracted + If not provided, all neurite_type will be used + - neurite: a dictionary {{neurite_feature: mode}} where: + - neurite_feature is a string from NEURITEFEATURES + - mode is an aggregation operation provided as a string such as: + ['min', 'max', 'median', 'mean', 'std', 'raw', 'total'] + + Returns: + The extracted statistics + + Note: + An example config can be found at: + + {config_path} + """ + if isinstance(neurons, FstNeuron): + neurons = [neurons] + config = config.copy() + + # Only NEURITEFEATURES are considered since the dataframe is built by neurite_type + # NEURONFEATURES are discarded + if 'neuron' in config: + del config['neuron'] + + stats = {nrn.name: extract_stats(nrn, config) for nrn in neurons} + columns = list(next(iter(next(iter(stats.values())).values())).keys()) + + rows = [[name, neurite_type] + list(features.values()) + for name, data in stats.items() + for neurite_type, features in data.items()] + return pd.DataFrame(columns=['name', 'neurite_type'] + columns, + data=rows) + + def extract_stats(neurons, config): - """Extract stats from neurons.""" - stats = defaultdict(dict) + """Extract stats from neurons. + + Arguments: + neurons: a neuron, population or neurite tree + config (dict): configuration dict. The keys are: + - neurite_type: a list of neurite types for which features are extracted + If not provided, all neurite_type will be used + - neurite: a dictionary {{neurite_feature: mode}} where: + - neurite_feature is a string from NEURITEFEATURES + - mode is an aggregation operation provided as a string such as: + ['min', 'max', 'median', 'mean', 'std', 'raw', 'total'] + + Returns: + The extracted statistics + + Note: + An example config can be found at: + + {config_path} + """ - def _fill_compoundified(data, stat_name, stat): - """Insert the stat in the dict and eventually split it into XYZ components.""" + def _fill_stats_dict(data, stat_name, stat): + """Insert the stat data in the dict. + + And if the stats is a 3D array, splits it into XYZ components. + """ if stat is None or not isinstance(stat, np.ndarray) or stat.shape not in ((3, ), ): data[stat_name] = stat else: @@ -86,21 +157,24 @@ def _fill_compoundified(data, stat_name, stat): compound_stat_name = stat_name + '_' + suffix data[compound_stat_name] = stat[i] + stats = defaultdict(dict) + for (feature_name, modes), neurite_type in product(config['neurite'].items(), - config['neurite_type']): + config.get('neurite_type', + _NEURITE_MAP.keys())): neurite_type = _NEURITE_MAP[neurite_type] for mode in modes: stat_name = _stat_name(feature_name, mode) stat = eval_stats(nm.get(feature_name, neurons, neurite_type=neurite_type), mode) - _fill_compoundified(stats[neurite_type.name], stat_name, stat) + _fill_stats_dict(stats[neurite_type.name], stat_name, stat) - for feature_name, modes in config['neuron'].items(): + for feature_name, modes in config.get('neuron', {}).items(): for mode in modes: stat_name = _stat_name(feature_name, mode) stat = eval_stats(nm.get(feature_name, neurons), mode) - _fill_compoundified(stats, stat_name, stat) + _fill_stats_dict(stats, stat_name, stat) - return stats + return dict(stats) def get_header(results): @@ -161,3 +235,7 @@ def sanitize_config(config): config['neuron'] = {} return config + + +extract_stats.__doc__ = extract_stats.__doc__.format(config_path=EXAMPLE_CONFIG) +extract_dataframe.__doc__ = extract_dataframe.__doc__.format(config_path=EXAMPLE_CONFIG) diff --git a/neurom/apps/tests/test_morph_stats.py b/neurom/apps/tests/test_morph_stats.py index a2072d7a0..99e31d710 100644 --- a/neurom/apps/tests/test_morph_stats.py +++ b/neurom/apps/tests/test_morph_stats.py @@ -31,14 +31,18 @@ import numpy as np from nose.tools import (assert_almost_equal, assert_equal, assert_greater_equal, assert_raises, ok_) +import pandas as pd +from pandas.testing import assert_frame_equal import neurom as nm from neurom.apps import morph_stats as ms from neurom.exceptions import ConfigError from neurom.features import NEURITEFEATURES, NEURONFEATURES + _path = os.path.dirname(os.path.abspath(__file__)) -DATA_PATH = os.path.join(_path, '../../../test_data/swc') +DATA_PATH = os.path.join(_path, '../../../test_data') +SWC_PATH = os.path.join(DATA_PATH, 'swc') REF_CONFIG = { @@ -127,7 +131,7 @@ def test_eval_stats_applies_numpy_function(): def test_extract_stats_single_neuron(): - nrn = nm.load_neuron(os.path.join(DATA_PATH, 'Neuron.swc')) + nrn = nm.load_neuron(os.path.join(SWC_PATH, 'Neuron.swc')) res = ms.extract_stats(nrn, REF_CONFIG) assert_equal(set(res.keys()), set(REF_OUT.keys())) # Note: soma radius is calculated from the sphere that gives the area @@ -140,6 +144,56 @@ def test_extract_stats_single_neuron(): assert_almost_equal(res[k][kk], REF_OUT[k][kk], places=3) +def test_extract_dataframe(): + # Vanilla test + nrns = nm.load_neurons([os.path.join(SWC_PATH, name) + for name in ['Neuron.swc', 'simple.swc']]) + actual = ms.extract_dataframe(nrns, REF_CONFIG) + expected = pd.read_csv(os.path.join(DATA_PATH, 'extracted-stats.csv'), index_col=0) + assert_frame_equal(actual, expected) + + # Test with a single neuron in the population + nrns = nm.load_neurons(os.path.join(SWC_PATH, 'Neuron.swc')) + actual = ms.extract_dataframe(nrns, REF_CONFIG) + assert_frame_equal(actual, expected[expected.name == 'Neuron'], check_dtype=False) + + # Test with a config without the 'neuron' key + nrns = nm.load_neurons([os.path.join(SWC_PATH, name) + for name in ['Neuron.swc', 'simple.swc']]) + config = {'neurite': {'section_lengths': ['total']}, + 'neurite_type': ['AXON', 'APICAL_DENDRITE', 'BASAL_DENDRITE', 'ALL']} + actual = ms.extract_dataframe(nrns, config) + expected = expected[['name', 'neurite_type', 'total_section_length']] + assert_frame_equal(actual, expected) + + # Test with a FstNeuron argument + nrn = nm.load_neuron(os.path.join(SWC_PATH, 'Neuron.swc')) + actual = ms.extract_dataframe(nrn, config) + assert_frame_equal(actual, expected[expected.name == 'Neuron'], check_dtype=False) + + # Test with a List[FstNeuron] argument + nrns = [nm.load_neuron(os.path.join(SWC_PATH, name)) + for name in ['Neuron.swc', 'simple.swc']] + actual = ms.extract_dataframe(nrns, config) + assert_frame_equal(actual, expected) + + # Test without any neurite_type keys, it should pick the defaults + config = {'neurite': {'total_length_per_neurite': ['total']}} + actual = ms.extract_dataframe(nrns, config) + expected = pd.DataFrame( + columns=['name', 'neurite_type', 'total_total_length_per_neurite'], + data=[['Neuron', 'axon', 207.879752], + ['Neuron', 'basal_dendrite', 418.432416], + ['Neuron', 'apical_dendrite', 214.373046], + ['Neuron', 'all', 840.685214], + ['simple', 'axon', 15.000000], + ['simple', 'basal_dendrite', 16.000000], + ['simple', 'apical_dendrite', 0.000000], + ['simple', 'all', 31.000000]]) + assert_frame_equal(actual, expected) + + + def test_get_header(): fake_results = {'fake_name0': REF_OUT, 'fake_name1': REF_OUT, diff --git a/neurom/tests/test_stats.py b/neurom/tests/test_stats.py index a60890553..29affee49 100644 --- a/neurom/tests/test_stats.py +++ b/neurom/tests/test_stats.py @@ -32,10 +32,9 @@ these tests are only sanity checks. """ +import numpy as np from neurom import stats as st from nose import tools as nt -import numpy as np -import random np.random.seed(42) @@ -70,7 +69,6 @@ def test_fit_normal_regression(): nt.assert_almost_equal(fit_.errs[0], 0.021479979161, 12) nt.assert_almost_equal(fit_.errs[1], 0.7369569123250506, 12) - def test_fit_default_is_normal(): fit0_ = st.fit(NORMAL) fit1_ = st.fit(NORMAL, 'norm') diff --git a/setup.py b/setup.py index b110c9c34..3d8b6ef58 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ 'h5py>=2.7.1', 'matplotlib>=3.2.1', 'numpy>=1.8.0', + 'pandas>=1.0.5', 'pyyaml>=3.10', 'scipy>=1.2.0', 'tqdm>=4.8.4', diff --git a/test_data/extracted-stats.csv b/test_data/extracted-stats.csv new file mode 100644 index 000000000..642931522 --- /dev/null +++ b/test_data/extracted-stats.csv @@ -0,0 +1,9 @@ +,name,neurite_type,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_X,max_segment_midpoint_Y,max_segment_midpoint_Z +0,Neuron,axon,11.018460736176685,207.8797522090813,276.7385765728952,10.0,0.0,0.0,49.52030596415 +1,Neuron,apical_dendrite,11.758281556059444,214.37304577550353,271.9412385728449,10.0,64.40167498405,0.0,53.750947521650005 +2,Neuron,basal_dendrite,11.652508126101711,418.43241643793476,556.2279268208382,10.0,64.00787233325,48.48197694465,51.575580778049996 +3,Neuron,all,11.758281556059444,840.6852144225195,1104.9077419665782,10.0,64.40167498405,48.48197694465,53.750947521650005 +4,simple,axon,6.0,15.0,24.08554367752175,1.0,3.0,-2.0,0.0 +5,simple,apical_dendrite,,0.0,0.0,,,, +6,simple,basal_dendrite,6.0,16.0,27.227136331111538,1.0,3.0,5.0,0.0 +7,simple,all,6.0,31.0,51.312680008633286,1.0,3.0,5.0,0.0