Skip to content

Commit

Permalink
Release v0.1.11 (#17)
Browse files Browse the repository at this point in the history
* bump to v0.1.11
  • Loading branch information
kmax12 authored Oct 31, 2017
1 parent 2b75617 commit 921c731
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 188 deletions.
9 changes: 9 additions & 0 deletions docs/source/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,12 @@ Variable types
Ordinal
Boolean
Text


Feature Selection
------------------
.. currentmodule:: featuretools.selection
.. autosummary::
:toctree: generated/

remove_low_information_features
14 changes: 14 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
Changelog
---------

**v0.1.11** October 31, 2017
* Package linting (:pr:`7`)
* Custom primitive creation functions (:pr:`13`)
* Split requirements to separate files and pin to latest versions (:pr:`15`)
* Select low information features (:pr:`18`)
* Fix docs typos (:pr:`19`)
* Fixed Diff primitive for rare nan case (:pr:`21`)
* added some mising doc strings (:pr:`23`)
* Trend fix (:pr:`22`)
* Remove as_dir=False option from EntitySet.to_pickle() (:pr:`20`)
* Entity Normalization Preserves Types of Copy & Additional Variables (:pr:`25`)

**v0.1.10** October 12, 2017
* NumTrue primitive added and docstring of other primitives updated (:pr:`11`)
Expand Down Expand Up @@ -47,3 +58,6 @@ Changelog
**v0.1.2** July 3rd, 2017

* Initial release

.. command
.. git log --pretty=oneline --abbrev-commit
2 changes: 1 addition & 1 deletion featuretools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
from .utils.pickle_utils import *
import featuretools.demo

__version__ = '0.1.10'
__version__ = '0.1.11'
2 changes: 1 addition & 1 deletion featuretools/selection/api.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# flake8: noqa
from .variance_selection import *
from .selection import *
18 changes: 18 additions & 0 deletions featuretools/selection/selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
def remove_low_information_features(feature_matrix, features=None):
'''
Select features that have at least 2 unique values and that are not all null
Args:
feature_matrix (:class:`pd.DataFrame`): DataFrame whose columns are
feature names and rows are instances
features (list[:class:`featuretools.PrimitiveBase`] or list[str], optional):
List of features to select
'''
keep = [c for c in feature_matrix
if (feature_matrix[c].nunique(dropna=False) > 1 and
feature_matrix[c].dropna().shape[0] > 0)]
feature_matrix = feature_matrix[keep]
if features is not None:
features = [f for f in features
if f.get_name() in feature_matrix.columns]
return feature_matrix, features
return feature_matrix
143 changes: 0 additions & 143 deletions featuretools/selection/variance_selection.py

This file was deleted.

44 changes: 44 additions & 0 deletions featuretools/tests/selection/test_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from featuretools.selection import remove_low_information_features
from featuretools.tests.testing_utils import make_ecommerce_entityset
from featuretools import Feature
import numpy as np
import pandas as pd
import pytest


@pytest.fixture(scope='module')
def feature_matrix():
feature_matrix = pd.DataFrame({'test': [0, 1, 2],
'no_null': [np.nan, 0, 0],
'some_null': [np.nan, 0, 0],
'all_null': [np.nan, np.nan, np.nan],
'many_value': [1, 2, 3],
'dup_value': [1, 1, 2],
'one_value': [1, 1, 1]})
return feature_matrix


@pytest.fixture(scope='module')
def es(feature_matrix):
es = make_ecommerce_entityset()
es.entity_from_dataframe('test', feature_matrix, index='test')
return es


def test_remove_low_information_feature_names(feature_matrix):
feature_matrix = remove_low_information_features(feature_matrix)
assert feature_matrix.shape == (3, 5)
assert 'one_value' not in feature_matrix.columns
assert 'all_null' not in feature_matrix.columns


def test_remove_low_information_features(es, feature_matrix):
features = [Feature(v) for v in es['test'].variables]
feature_matrix, features = remove_low_information_features(feature_matrix,
features)
assert feature_matrix.shape == (3, 5)
assert len(features) == 5
for f in features:
assert f.get_name() in feature_matrix.columns
assert 'one_value' not in feature_matrix.columns
assert 'all_null' not in feature_matrix.columns
42 changes: 0 additions & 42 deletions featuretools/tests/selection/test_variance_selection.py

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def finalize_options(self):

setup(
name='featuretools',
version='0.1.10',
version='0.1.11',
packages=find_packages(),
description='a framework for automated feature engineering',
url='http://featuretools.com',
Expand Down

0 comments on commit 921c731

Please sign in to comment.