From 2b756175bb880464fbc2d8fe48a4e757e27ee231 Mon Sep 17 00:00:00 2001 From: Ben Schreck Date: Tue, 31 Oct 2017 12:24:43 -0700 Subject: [PATCH] Remove as_dir=False option from EntitySet.to_pickle() (#20) * entityset pickle fix * deep equality check * removed normalization bugfix into separate pr * syntax * rm chmod --- featuretools/entityset/entityset.py | 4 ++-- featuretools/entityset/serialization.py | 22 ++++--------------- featuretools/tests/entityset_tests/test_es.py | 17 ++++++++++++-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/featuretools/entityset/entityset.py b/featuretools/entityset/entityset.py index f1028f42b9..dba813a44b 100644 --- a/featuretools/entityset/entityset.py +++ b/featuretools/entityset/entityset.py @@ -91,8 +91,8 @@ def entity_names(self): """ return [e.id for e in self.entities] - def to_pickle(self, path, as_dir=False): - to_pickle(self, path, as_dir=as_dir) + def to_pickle(self, path): + to_pickle(self, path) return self @classmethod diff --git a/featuretools/entityset/serialization.py b/featuretools/entityset/serialization.py index 15cb76a850..92c07c0354 100644 --- a/featuretools/entityset/serialization.py +++ b/featuretools/entityset/serialization.py @@ -14,18 +14,14 @@ _datetime_types = vtypes.PandasTypes._pandas_datetimes -def to_pickle(entityset, path, as_dir=False): +def to_pickle(entityset, path): """Save the entityset at the given path. Args: entityset (:class:`featuretools.BaseEntitySet`) : EntitySet to save - path : pathname of a pickle file or directory to save the entityset files. - if as_dir is False, this treats path as a pickle file to save the entire entityset to - as_dir (bool) : If True, each entity will be saved to a subfolder, with data saved as a - gzip-compressed CSV, index information saved as a pickle file and - metadata saved as a pickle file - Additional metadata about the entityset itself will be saved to a - pickle file. + path : pathname of a directory to save the entityset + (includes a CSV file for each entity, as well as a metadata + pickle file) """ entityset_path = os.path.abspath(os.path.expanduser(path)) @@ -33,14 +29,7 @@ def to_pickle(entityset, path, as_dir=False): os.makedirs(entityset_path) except OSError: pass - if as_dir: - to_pickle_dir(entityset, entityset_path) - else: - pd_to_pickle(entityset, entityset_path) - os.chmod(entityset_path, 0o755) - -def to_pickle_dir(entityset, entityset_path): entity_store_dframes = {} entity_store_index_bys = {} @@ -99,9 +88,6 @@ def read_pickle(path): path (str): Path of directory where entityset is stored """ entityset_path = os.path.abspath(os.path.expanduser(path)) - if os.path.isfile(path): - return pd_read_pickle(path) - entityset = pd_read_pickle(os.path.join(entityset_path, 'entityset.p')) for e_id, entity_store in entityset.entity_stores.items(): entity_path = os.path.join(entityset_path, e_id) diff --git a/featuretools/tests/entityset_tests/test_es.py b/featuretools/tests/entityset_tests/test_es.py index cd7b8bb4b7..5e7067b458 100644 --- a/featuretools/tests/entityset_tests/test_es.py +++ b/featuretools/tests/entityset_tests/test_es.py @@ -1,8 +1,10 @@ import pytest from ..testing_utils import make_ecommerce_entityset - -from featuretools import Relationship, variable_types +from featuretools.tests import integration_data +from featuretools import Relationship, variable_types, EntitySet +import os +import shutil @pytest.fixture @@ -149,3 +151,14 @@ def test_gzip_glob_entityset(es, gzip_glob_es): df_1 = es.entity_stores['log'].df df_2 = gzip_glob_es.entity_stores['log'].df assert df_1.equals(df_2) + + +def test_serialization(es): + dirname = os.path.dirname(integration_data.__file__) + path = os.path.join(dirname, 'test_entityset.p') + if os.path.exists(path): + shutil.rmtree(path) + es.to_pickle(path) + new_es = EntitySet.read_pickle(path) + assert es.__eq__(new_es, deep=True) + shutil.rmtree(path)