Skip to content

Commit

Permalink
New function extract_dataframe that extract morphometrics as a datafr…
Browse files Browse the repository at this point in the history
…ame (#841)


if the optional argument as_frame=True, otherwise the legacy return type (dict) is used

The dataframe looks like:

     name     neurite_type  ...  max_segment_midpoint_Y  max_segment_midpoint_Z
0  Neuron             axon  ...                0.000000               49.520306
1  Neuron  apical_dendrite  ...                0.000000               53.750948
4  simple             axon  ...               -2.000000                0.000000
5  simple  apical_dendrite  ...                     NaN                     NaN


And the 'neurite_type' key becomes optional for `extract_dataframe` and
`extract_stats`
  • Loading branch information
Benoit Coste authored Jul 17, 2020
1 parent e41a817 commit df5bc62
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 25 deletions.
13 changes: 4 additions & 9 deletions apps/morph_stats
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,19 @@
import argparse
import json
import logging
import os
import sys

import pkg_resources

import neurom as nm
from neurom import exceptions
from neurom.apps import get_config
from neurom.apps.morph_stats import (extract_stats, generate_flattened_dict,
get_header, sanitize_config, full_config)
get_header, sanitize_config, full_config,
EXAMPLE_CONFIG)
from neurom.io.utils import get_files_by_path
from neurom.utils import NeuromJSON

L = logging.getLogger(__name__)

CONFIG_PATH = pkg_resources.resource_filename(
'neurom', 'config')

IGNORABLE_EXCEPTIONS = {
'SomaError': exceptions.SomaError,
}
Expand All @@ -58,7 +53,7 @@ def get_parser():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='Morphology statistics extractor',
epilog='Example config: {}'.format(os.path.join(CONFIG_PATH, 'morph_stats.yaml')))
epilog='Example config: {}'.format(EXAMPLE_CONFIG))

parser.add_argument('datapath', nargs='?',
help='Path to a morphology data file or a directory')
Expand Down Expand Up @@ -98,7 +93,7 @@ def main(args):
config = full_config()
else:
try:
config = get_config(args.config, os.path.join(CONFIG_PATH, 'morph_stats.yaml'))
config = get_config(args.config, EXAMPLE_CONFIG)
config = sanitize_config(config)
except exceptions.ConfigError as e:
L.error(str(e))
Expand Down
100 changes: 89 additions & 11 deletions neurom/apps/morph_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,31 @@
import logging
from collections import defaultdict
from itertools import product
import os
import pkg_resources


import numpy as np
import pandas as pd

import neurom as nm
from neurom.fst._core import FstNeuron
from neurom.features import NEURONFEATURES, NEURITEFEATURES

from neurom.exceptions import ConfigError

L = logging.getLogger(__name__)

EXAMPLE_CONFIG = os.path.join(pkg_resources.resource_filename(
'neurom', 'config'), 'morph_stats.yaml')


def eval_stats(values, mode):
"""Extract a summary statistic from an array of list of values.
Arguments:
values: A numpy array of values
mode: A summary stat to extract. One of ['min', 'max', 'median', 'mean', 'std', 'raw']
mode: A summary stat to extract. One of:
['min', 'max', 'median', 'mean', 'std', 'raw', 'total']
Note: fails silently if values is empty, and None is returned
"""
Expand Down Expand Up @@ -73,34 +83,98 @@ def _stat_name(feat_name, stat_mode):
return '%s_%s' % (stat_mode, feat_name)


def extract_dataframe(neurons, config):
"""Extract stats grouped by neurite type from neurons.
Arguments:
neurons: a neuron, population or neurite tree
config (dict): configuration dict. The keys are:
- neurite_type: a list of neurite types for which features are extracted
If not provided, all neurite_type will be used
- neurite: a dictionary {{neurite_feature: mode}} where:
- neurite_feature is a string from NEURITEFEATURES
- mode is an aggregation operation provided as a string such as:
['min', 'max', 'median', 'mean', 'std', 'raw', 'total']
Returns:
The extracted statistics
Note:
An example config can be found at:
{config_path}
"""
if isinstance(neurons, FstNeuron):
neurons = [neurons]
config = config.copy()

# Only NEURITEFEATURES are considered since the dataframe is built by neurite_type
# NEURONFEATURES are discarded
if 'neuron' in config:
del config['neuron']

stats = {nrn.name: extract_stats(nrn, config) for nrn in neurons}
columns = list(next(iter(next(iter(stats.values())).values())).keys())

rows = [[name, neurite_type] + list(features.values())
for name, data in stats.items()
for neurite_type, features in data.items()]
return pd.DataFrame(columns=['name', 'neurite_type'] + columns,
data=rows)


def extract_stats(neurons, config):
"""Extract stats from neurons."""
stats = defaultdict(dict)
"""Extract stats from neurons.
Arguments:
neurons: a neuron, population or neurite tree
config (dict): configuration dict. The keys are:
- neurite_type: a list of neurite types for which features are extracted
If not provided, all neurite_type will be used
- neurite: a dictionary {{neurite_feature: mode}} where:
- neurite_feature is a string from NEURITEFEATURES
- mode is an aggregation operation provided as a string such as:
['min', 'max', 'median', 'mean', 'std', 'raw', 'total']
Returns:
The extracted statistics
Note:
An example config can be found at:
{config_path}
"""

def _fill_compoundified(data, stat_name, stat):
"""Insert the stat in the dict and eventually split it into XYZ components."""
def _fill_stats_dict(data, stat_name, stat):
"""Insert the stat data in the dict.
And if the stats is a 3D array, splits it into XYZ components.
"""
if stat is None or not isinstance(stat, np.ndarray) or stat.shape not in ((3, ), ):
data[stat_name] = stat
else:
for i, suffix in enumerate('XYZ'):
compound_stat_name = stat_name + '_' + suffix
data[compound_stat_name] = stat[i]

stats = defaultdict(dict)

for (feature_name, modes), neurite_type in product(config['neurite'].items(),
config['neurite_type']):
config.get('neurite_type',
_NEURITE_MAP.keys())):
neurite_type = _NEURITE_MAP[neurite_type]
for mode in modes:
stat_name = _stat_name(feature_name, mode)
stat = eval_stats(nm.get(feature_name, neurons, neurite_type=neurite_type), mode)
_fill_compoundified(stats[neurite_type.name], stat_name, stat)
_fill_stats_dict(stats[neurite_type.name], stat_name, stat)

for feature_name, modes in config['neuron'].items():
for feature_name, modes in config.get('neuron', {}).items():
for mode in modes:
stat_name = _stat_name(feature_name, mode)
stat = eval_stats(nm.get(feature_name, neurons), mode)
_fill_compoundified(stats, stat_name, stat)
_fill_stats_dict(stats, stat_name, stat)

return stats
return dict(stats)


def get_header(results):
Expand Down Expand Up @@ -161,3 +235,7 @@ def sanitize_config(config):
config['neuron'] = {}

return config


extract_stats.__doc__ = extract_stats.__doc__.format(config_path=EXAMPLE_CONFIG)
extract_dataframe.__doc__ = extract_dataframe.__doc__.format(config_path=EXAMPLE_CONFIG)
58 changes: 56 additions & 2 deletions neurom/apps/tests/test_morph_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,18 @@
import numpy as np
from nose.tools import (assert_almost_equal, assert_equal,
assert_greater_equal, assert_raises, ok_)
import pandas as pd
from pandas.testing import assert_frame_equal

import neurom as nm
from neurom.apps import morph_stats as ms
from neurom.exceptions import ConfigError
from neurom.features import NEURITEFEATURES, NEURONFEATURES


_path = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(_path, '../../../test_data/swc')
DATA_PATH = os.path.join(_path, '../../../test_data')
SWC_PATH = os.path.join(DATA_PATH, 'swc')


REF_CONFIG = {
Expand Down Expand Up @@ -127,7 +131,7 @@ def test_eval_stats_applies_numpy_function():


def test_extract_stats_single_neuron():
nrn = nm.load_neuron(os.path.join(DATA_PATH, 'Neuron.swc'))
nrn = nm.load_neuron(os.path.join(SWC_PATH, 'Neuron.swc'))
res = ms.extract_stats(nrn, REF_CONFIG)
assert_equal(set(res.keys()), set(REF_OUT.keys()))
# Note: soma radius is calculated from the sphere that gives the area
Expand All @@ -140,6 +144,56 @@ def test_extract_stats_single_neuron():
assert_almost_equal(res[k][kk], REF_OUT[k][kk], places=3)


def test_extract_dataframe():
# Vanilla test
nrns = nm.load_neurons([os.path.join(SWC_PATH, name)
for name in ['Neuron.swc', 'simple.swc']])
actual = ms.extract_dataframe(nrns, REF_CONFIG)
expected = pd.read_csv(os.path.join(DATA_PATH, 'extracted-stats.csv'), index_col=0)
assert_frame_equal(actual, expected)

# Test with a single neuron in the population
nrns = nm.load_neurons(os.path.join(SWC_PATH, 'Neuron.swc'))
actual = ms.extract_dataframe(nrns, REF_CONFIG)
assert_frame_equal(actual, expected[expected.name == 'Neuron'], check_dtype=False)

# Test with a config without the 'neuron' key
nrns = nm.load_neurons([os.path.join(SWC_PATH, name)
for name in ['Neuron.swc', 'simple.swc']])
config = {'neurite': {'section_lengths': ['total']},
'neurite_type': ['AXON', 'APICAL_DENDRITE', 'BASAL_DENDRITE', 'ALL']}
actual = ms.extract_dataframe(nrns, config)
expected = expected[['name', 'neurite_type', 'total_section_length']]
assert_frame_equal(actual, expected)

# Test with a FstNeuron argument
nrn = nm.load_neuron(os.path.join(SWC_PATH, 'Neuron.swc'))
actual = ms.extract_dataframe(nrn, config)
assert_frame_equal(actual, expected[expected.name == 'Neuron'], check_dtype=False)

# Test with a List[FstNeuron] argument
nrns = [nm.load_neuron(os.path.join(SWC_PATH, name))
for name in ['Neuron.swc', 'simple.swc']]
actual = ms.extract_dataframe(nrns, config)
assert_frame_equal(actual, expected)

# Test without any neurite_type keys, it should pick the defaults
config = {'neurite': {'total_length_per_neurite': ['total']}}
actual = ms.extract_dataframe(nrns, config)
expected = pd.DataFrame(
columns=['name', 'neurite_type', 'total_total_length_per_neurite'],
data=[['Neuron', 'axon', 207.879752],
['Neuron', 'basal_dendrite', 418.432416],
['Neuron', 'apical_dendrite', 214.373046],
['Neuron', 'all', 840.685214],
['simple', 'axon', 15.000000],
['simple', 'basal_dendrite', 16.000000],
['simple', 'apical_dendrite', 0.000000],
['simple', 'all', 31.000000]])
assert_frame_equal(actual, expected)



def test_get_header():
fake_results = {'fake_name0': REF_OUT,
'fake_name1': REF_OUT,
Expand Down
4 changes: 1 addition & 3 deletions neurom/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@
these tests are only sanity checks.
"""

import numpy as np
from neurom import stats as st
from nose import tools as nt
import numpy as np
import random

np.random.seed(42)

Expand Down Expand Up @@ -70,7 +69,6 @@ def test_fit_normal_regression():
nt.assert_almost_equal(fit_.errs[0], 0.021479979161, 12)
nt.assert_almost_equal(fit_.errs[1], 0.7369569123250506, 12)


def test_fit_default_is_normal():
fit0_ = st.fit(NORMAL)
fit1_ = st.fit(NORMAL, 'norm')
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
'h5py>=2.7.1',
'matplotlib>=3.2.1',
'numpy>=1.8.0',
'pandas>=1.0.5',
'pyyaml>=3.10',
'scipy>=1.2.0',
'tqdm>=4.8.4',
Expand Down
9 changes: 9 additions & 0 deletions test_data/extracted-stats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,name,neurite_type,max_section_length,total_section_length,total_section_volume,max_section_branch_order,max_segment_midpoint_X,max_segment_midpoint_Y,max_segment_midpoint_Z
0,Neuron,axon,11.018460736176685,207.8797522090813,276.7385765728952,10.0,0.0,0.0,49.52030596415
1,Neuron,apical_dendrite,11.758281556059444,214.37304577550353,271.9412385728449,10.0,64.40167498405,0.0,53.750947521650005
2,Neuron,basal_dendrite,11.652508126101711,418.43241643793476,556.2279268208382,10.0,64.00787233325,48.48197694465,51.575580778049996
3,Neuron,all,11.758281556059444,840.6852144225195,1104.9077419665782,10.0,64.40167498405,48.48197694465,53.750947521650005
4,simple,axon,6.0,15.0,24.08554367752175,1.0,3.0,-2.0,0.0
5,simple,apical_dendrite,,0.0,0.0,,,,
6,simple,basal_dendrite,6.0,16.0,27.227136331111538,1.0,3.0,5.0,0.0
7,simple,all,6.0,31.0,51.312680008633286,1.0,3.0,5.0,0.0

0 comments on commit df5bc62

Please sign in to comment.