From 18c4402facecd0cc9cd7956f343f09c70f6354fc Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Thu, 29 Aug 2019 10:16:13 -0600 Subject: [PATCH 1/9] added and make a for X --- .../general_resource_computers.py | 6 +++ metalearn/metafeatures/metafeatures.py | 42 ++++++++++++------- tests/single_run.py | 0 3 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 tests/single_run.py diff --git a/metalearn/metafeatures/general_resource_computers.py b/metalearn/metafeatures/general_resource_computers.py index 0dd4371..c947fda 100644 --- a/metalearn/metafeatures/general_resource_computers.py +++ b/metalearn/metafeatures/general_resource_computers.py @@ -6,6 +6,11 @@ import metalearn.metafeatures.constants as consts +def get_X(X_raw): + return X_raw.dropna(axis=1, how="all"), + +get_X = ResourceComputer(get_X, ["X"]) + def get_cv_seed(seed_base, seed_offset): return (seed_base + seed_offset,) @@ -235,6 +240,7 @@ def get_text_features_with_no_missing_values( instances in this module. """ resources_info = build_resources_info( + get_X, get_cv_seed, sample_columns, sample_rows, diff --git a/metalearn/metafeatures/metafeatures.py b/metalearn/metafeatures/metafeatures.py index 2466fd6..0d3574f 100644 --- a/metalearn/metafeatures/metafeatures.py +++ b/metalearn/metafeatures/metafeatures.py @@ -25,7 +25,6 @@ from metalearn.metafeatures.statistical_metafeatures import metafeatures_info as statistical_metafeatures from metalearn.metafeatures.text_metafeatures import metafeatures_info as text_metafeatures - class Metafeatures(object): """ Computes metafeatures on a given tabular dataset (pandas.DataFrame) with @@ -41,7 +40,7 @@ class Metafeatures(object): # noop resource computers for the user-provided resources # `_get_arguments` and `_resource_is_target_dependent` assumes ResourceComputer's - for resource_name in ["X_raw", "X", "Y", "column_types", "sample_shape", "seed_base", "n_folds"]: + for resource_name in ["X_raw", "Y", "column_types", "sample_shape", "seed_base", "n_folds"]: _resources_info[resource_name] = ResourceComputer(lambda: None, [resource_name]) _mfs_info = [ @@ -85,7 +84,7 @@ def compute( self, X: DataFrame, Y: Series=None, column_types: Dict[str, str]=None, metafeature_ids: List=None, exclude: List=None, sample_shape=None, seed=None, n_folds=2, - verbose=False, timeout=None + verbose=False, timeout=None, return_times=False ) -> dict: """ Parameters @@ -123,7 +122,7 @@ def compute( start_time = time.time() self._validate_compute_arguments( X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ) if timeout is None: def check_time(): @@ -145,9 +144,11 @@ def check_time(): seed = np.random.randint(np.iinfo(np.int32).max) self._validate_compute_arguments( X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ) + self._return_times = return_times + self._init_resources( X, Y, column_types, sample_shape, seed, n_folds ) @@ -176,6 +177,10 @@ def check_time(): except TimeoutError: pass + if not return_times: + for mf, result_dict in computed_metafeatures.items(): + del result_dict[consts.COMPUTE_TIME_KEY] + return computed_metafeatures def _format_resource(self, value, compute_time): @@ -191,7 +196,7 @@ def _init_resources( # Add the base resources to our resources hash self._resources = { "X_raw": self._format_resource(X, 0.), # TODO: rename to X - "X": self._format_resource(X.dropna(axis=1, how="all"), 0.), # TODO: make resource computer; rename + # "X": self._format_resource(X.dropna(axis=1, how="all"), 0.), # TODO: make resource computer; rename "Y": self._format_resource(Y, 0.), "column_types": self._format_resource(column_types, 0.), "sample_shape": self._format_resource(sample_shape, 0.), @@ -216,7 +221,7 @@ def _resource_is_target_dependent(cls, resource_id): def _validate_compute_arguments( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): for f in [ self._validate_X, self._validate_Y, self._validate_column_types, @@ -225,12 +230,12 @@ def _validate_compute_arguments( ]: f( X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ) def _validate_X( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not isinstance(X, pd.DataFrame): raise TypeError('X must be of type pandas.DataFrame') @@ -239,7 +244,7 @@ def _validate_X( def _validate_Y( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not isinstance(Y, pd.Series) and not Y is None: raise TypeError('Y must be of type pandas.Series') @@ -248,7 +253,7 @@ def _validate_Y( def _validate_column_types( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not column_types is None: invalid_column_types = {} @@ -272,7 +277,7 @@ def _validate_column_types( def _validate_metafeature_ids( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): ids = None if metafeature_ids is not None and exclude is not None: @@ -295,7 +300,7 @@ def _validate_metafeature_ids( def _validate_sample_shape( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not sample_shape is None: if not type(sample_shape) in [tuple, list]: @@ -317,7 +322,7 @@ def _validate_sample_shape( def _validate_n_folds( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not dtype_is_numeric(type(n_folds)) or (n_folds != int(n_folds)): raise ValueError(f"`n_folds` must be an integer, not {n_folds}") @@ -344,11 +349,18 @@ def _validate_n_folds( def _validate_verbose( self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, - n_folds, verbose + n_folds, verbose, return_times ): if not type(verbose) is bool: raise ValueError("`verbose` must be of type bool.") + def _validate_return_times( + self, X, Y, column_types, metafeature_ids, exclude, sample_shape, seed, + n_folds, verbose, return_times + ): + if not type(return_times) is bool: + raise ValueError("`return_times` must be of type bool.") + # todo: intelligently infer TEXT data type def _infer_column_types(self, X, Y): column_types = {} diff --git a/tests/single_run.py b/tests/single_run.py new file mode 100644 index 0000000..e69de29 From e2ce45e73a324127c9da842da4cf9dffaef672eb Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Thu, 29 Aug 2019 11:39:09 -0600 Subject: [PATCH 2/9] updated metafeature schema to make compute_times optional --- metalearn/metafeatures/metafeatures_schema.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metalearn/metafeatures/metafeatures_schema.json b/metalearn/metafeatures/metafeatures_schema.json index 6258c9e..8f80f29 100644 --- a/metalearn/metafeatures/metafeatures_schema.json +++ b/metalearn/metafeatures/metafeatures_schema.json @@ -5,8 +5,7 @@ "dataset_metafeature": { "type": "object", "required": [ - "value", - "compute_time" + "value" ], "properties": { "value": { From dcb842ab62468d8168bfa62e91693528854f34d8 Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Tue, 3 Sep 2019 09:04:07 -0600 Subject: [PATCH 3/9] finish merge --- metalearn/metafeatures/metafeatures.py | 3 --- tests/test_metafeatures.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/metalearn/metafeatures/metafeatures.py b/metalearn/metafeatures/metafeatures.py index 0d3574f..2bd8aea 100644 --- a/metalearn/metafeatures/metafeatures.py +++ b/metalearn/metafeatures/metafeatures.py @@ -147,8 +147,6 @@ def check_time(): n_folds, verbose, return_times ) - self._return_times = return_times - self._init_resources( X, Y, column_types, sample_shape, seed, n_folds ) @@ -196,7 +194,6 @@ def _init_resources( # Add the base resources to our resources hash self._resources = { "X_raw": self._format_resource(X, 0.), # TODO: rename to X - # "X": self._format_resource(X.dropna(axis=1, how="all"), 0.), # TODO: make resource computer; rename "Y": self._format_resource(Y, 0.), "column_types": self._format_resource(column_types, 0.), "sample_shape": self._format_resource(sample_shape, 0.), diff --git a/tests/test_metafeatures.py b/tests/test_metafeatures.py index 3ae1aa0..1f0992e 100644 --- a/tests/test_metafeatures.py +++ b/tests/test_metafeatures.py @@ -763,3 +763,18 @@ def test_y_no_name(self): def test_no_duplicate_mf_ids(self): self.assertEqual(len(Metafeatures.IDS), len(set(Metafeatures.IDS)), 'Metafeatures has duplicate IDS') + + def test_compute_times(self): + X = pd.DataFrame(np.random.rand(8, 2)) + y = pd.Series(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']) + mf_list = Metafeatures.list_metafeatures() + metafeatures = Metafeatures() + for mf in mf_list: + start = time.perf_counter() + result = metafeatures.compute(X, y, metafeature_ids=[mf], return_times=True) + measured_time = time.perf_counter() - start + returned_time = result[mf][consts.COMPUTE_TIME_KEY] + + if measured_time < returned_time or not np.isclose(measured_time, returned_time, rtol=0.05, atol=0.005): + self.fail(f'Compute time for {mf} is incorrect. ' + f'Returned time is {returned_time}, measured time is {measured_time}') From ec7928504a0866314f835bb047609f0a6b3953c6 Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Mon, 16 Sep 2019 09:10:17 -0600 Subject: [PATCH 4/9] removed compute-time tests --- tests/test_metafeatures.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/test_metafeatures.py b/tests/test_metafeatures.py index 1f0992e..3ae1aa0 100644 --- a/tests/test_metafeatures.py +++ b/tests/test_metafeatures.py @@ -763,18 +763,3 @@ def test_y_no_name(self): def test_no_duplicate_mf_ids(self): self.assertEqual(len(Metafeatures.IDS), len(set(Metafeatures.IDS)), 'Metafeatures has duplicate IDS') - - def test_compute_times(self): - X = pd.DataFrame(np.random.rand(8, 2)) - y = pd.Series(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']) - mf_list = Metafeatures.list_metafeatures() - metafeatures = Metafeatures() - for mf in mf_list: - start = time.perf_counter() - result = metafeatures.compute(X, y, metafeature_ids=[mf], return_times=True) - measured_time = time.perf_counter() - start - returned_time = result[mf][consts.COMPUTE_TIME_KEY] - - if measured_time < returned_time or not np.isclose(measured_time, returned_time, rtol=0.05, atol=0.005): - self.fail(f'Compute time for {mf} is incorrect. ' - f'Returned time is {returned_time}, measured time is {measured_time}') From 803b49e5b2b0f2e2d3ee2283bc33b4e9dd29a0cf Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Mon, 16 Sep 2019 09:29:26 -0600 Subject: [PATCH 5/9] added tests for returning compute times --- tests/test_metafeatures.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_metafeatures.py b/tests/test_metafeatures.py index 3ae1aa0..93c24e2 100644 --- a/tests/test_metafeatures.py +++ b/tests/test_metafeatures.py @@ -763,3 +763,20 @@ def test_y_no_name(self): def test_no_duplicate_mf_ids(self): self.assertEqual(len(Metafeatures.IDS), len(set(Metafeatures.IDS)), 'Metafeatures has duplicate IDS') + + def test_compute_time(self): + no_time_mfs = Metafeatures().compute(self.dummy_features, self.dummy_target, return_times=False) + self.assertTrue( + all(len(result.keys()) == 1 + and consts.COMPUTE_TIME_KEY not in result + and consts.VALUE_KEY in result + for result in no_time_mfs.values()), + f'return_times is set to False but some compute_times were still returned') + + timed_mfs = Metafeatures().compute(self.dummy_features, self.dummy_target, return_times=True,) + self.assertTrue( + all(len(result) == 2 + and consts.COMPUTE_TIME_KEY in result + and consts.VALUE_KEY in result + for result in timed_mfs.values()), + f'return_times is set to True but some compute_times were not returned') From 5b4f7c1539183d31e1f78824baf46b1a950a0e9e Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Mon, 16 Sep 2019 09:30:05 -0600 Subject: [PATCH 6/9] removed compute_times from known metafeatures --- .../38_sick_train_data_mf.json | 231 ------------------ .../small_test_dataset_mf.json | 231 ------------------ .../small_test_dataset_with_text_mf.json | 231 ------------------ 3 files changed, 693 deletions(-) diff --git a/tests/data/dataset_metafeatures/38_sick_train_data_mf.json b/tests/data/dataset_metafeatures/38_sick_train_data_mf.json index a97c1e1..6ac1523 100644 --- a/tests/data/dataset_metafeatures/38_sick_train_data_mf.json +++ b/tests/data/dataset_metafeatures/38_sick_train_data_mf.json @@ -1,926 +1,695 @@ { "CategoricalNoiseToSignalRatio": { - "compute_time": 0.10639627503405791, "value": 65.12375165878576 }, "ClassEntropy": { - "compute_time": 0.0016181730170501396, "value": 0.2303678505246139 }, "DecisionStumpErrRate": { - "compute_time": 0.05637974603450857, "value": 0.0530205974425042 }, "DecisionStumpKappa": { - "compute_time": 0.05637974603450857, "value": 0.6460807839667586 }, "DecisionTreeHeight": { - "compute_time": 0.039231870032381266, "value": 11 }, "DecisionTreeLeafCount": { - "compute_time": 0.039231870032381266, "value": 49 }, "DecisionTreeNodeCount": { - "compute_time": 0.039231870032381266, "value": 97 }, "DecisionTreeWidth": { - "compute_time": 0.039326284037088044, "value": 9 }, "Dimensionality": { - "compute_time": 4.1207007598131895e-05, "value": 0.0076882290562036056 }, "EquivalentNumberOfCategoricalFeatures": { - "compute_time": 0.0810160680412082, "value": 64.97682682412889 }, "EquivalentNumberOfNumericFeatures": { - "compute_time": 0.11181286803912371, "value": 8.265561221791359 }, "KurtosisCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 10.626941000939418 }, "KurtosisCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": -1.5824634006168445 }, "KurtosisCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 3.6459237447298536 }, "KurtosisCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 3.2709710235309224 }, "KurtosisCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 14.080033213321688 }, "KurtosisClassProbability": { - "compute_time": 0.0012301289971219376, "value": -2.0 }, "KurtosisDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 10.617333960472148 }, "KurtosisDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": -0.20597828128078532 }, "KurtosisDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": -1.1570111417931213 }, "KurtosisKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 1.0285691141266575 }, "KurtosisKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "KurtosisMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": -1.6406633903613828 }, "KurtosisMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "KurtosisNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 0.25957571575898 }, "KurtosisNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 0.18662301318798447 }, "KurtosisNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.7111184356200178 }, "KurtosisSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.1767947825290799 }, "KurtosisSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "KurtosisStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": -1.5096852202508921 }, "KurtosisStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "LinearDiscriminantAnalysisErrRate": { - "compute_time": 0.09895071103528608, "value": 0.050901392889222574 }, "LinearDiscriminantAnalysisKappa": { - "compute_time": 0.09895071103528608, "value": 0.41518335844457105 }, "MajorityClassSize": { - "compute_time": 0.0012301289971219376, "value": 3541 }, "MaxCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 5.0 }, "MaxCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 288.0 }, "MaxCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 1.0540619253604604 }, "MaxCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 1.2427930071894404 }, "MaxCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.04163676869563335 }, "MaxClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.9387592788971368 }, "MaxDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 49.0 }, "MaxDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 10.0 }, "MaxDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 22.0 }, "MaxKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 238.18146237806315 }, "MaxKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "MaxMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 110.4696486566283 }, "MaxMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "MaxNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 1.7001812503765323 }, "MaxNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 1.9127090565977578 }, "MaxNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.10645678195030889 }, "MaxSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 13.882652755041768 }, "MaxSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "MaxStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 35.60424760764098 }, "MaxStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "MeanCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 2.1363636363636362 }, "MeanCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 179.33333333333334 }, "MeanCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.2344341403357237 }, "MeanCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.4613654485629755 }, "MeanCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.003545384743827898 }, "MeanClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.5 }, "MeanDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 5.705882352941177 }, "MeanDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 6.26530612244898 }, "MeanDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 8.818181818181818 }, "MeanKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 51.413135065510026 }, "MeanKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "MeanMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 46.436689696411385 }, "MeanMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "MeanNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 1.240228736551128 }, "MeanNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 1.4577611891821942 }, "MeanNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.027870805664988743 }, "MeanSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 3.5691918824691036 }, "MeanSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "MeanStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 19.053877588965566 }, "MeanStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "MinCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 1.0 }, "MinCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 70.0 }, "MinCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.0 }, "MinCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.2303678505246139 }, "MinCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": -1.6653345369377348e-15 }, "MinClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.0612407211028632 }, "MinDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 1.0 }, "MinDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 4.0 }, "MinDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 1.0 }, "MinKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 4.073471498748891 }, "MinKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "MinMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 0.9949997045790251 }, "MinMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "MinNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 0.14521170237536474 }, "MinNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 0.39093470087095644 }, "MinNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.0008582987317293472 }, "MinSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.2326742385072778 }, "MinSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "MinStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 0.1954572751132885 }, "MinStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "MinorityClassSize": { - "compute_time": 0.0012301289971219376, "value": 231 }, "NaiveBayesErrRate": { - "compute_time": 0.05943656704039313, "value": 0.6781856595244019 }, "NaiveBayesKappa": { - "compute_time": 0.05943656704039313, "value": 0.039699494487290043 }, "NumberOfCategoricalFeatures": { - "compute_time": 1.9931001588702202e-05, "value": 22 }, "NumberOfClasses": { - "compute_time": 0.0012301289971219376, "value": 2 }, "NumberOfDistinctTokens": { - "compute_time": 0.00022466000518761575, "value": 0 }, "NumberOfFeatures": { - "compute_time": 1.9931001588702202e-05, "value": 29 }, "NumberOfFeaturesWithMissingValues": { - "compute_time": 0.008072049007751048, "value": 8 }, "NumberOfInstances": { - "compute_time": 1.9931001588702202e-05, "value": 3772 }, "NumberOfInstancesWithMissingValues": { - "compute_time": 0.008072049007751048, "value": 3772 }, "NumberOfMissingValues": { - "compute_time": 0.008072049007751048, "value": 6064 }, "NumberOfNumericFeatures": { - "compute_time": 1.9931001588702202e-05, "value": 7 }, "NumberOfTokens": { - "compute_time": 0.00022466000518761575, "value": 0 }, "NumberOfTokensContainingNumericChar": { - "compute_time": 0.00022466000518761575, "value": 0 }, "NumericNoiseToSignalRatio": { - "compute_time": 0.1318673100322485, "value": 43.499206497970064 }, "PredDet": { - "compute_time": 0.07142707602179144, "value": 8.398206098130474e+47 }, "PredEigen1": { - "compute_time": 0.07142707602179144, "value": 2076.940458333572 }, "PredEigen2": { - "compute_time": 0.07142707602179144, "value": 584.6940837111429 }, "PredEigen3": { - "compute_time": 0.07142707602179144, "value": 416.0684363789902 }, "PredPCA1": { - "compute_time": 0.07142707602179144, "value": 0.6131782342417007 }, "PredPCA2": { - "compute_time": 0.07142707602179144, "value": 0.17262010780474016 }, "PredPCA3": { - "compute_time": 0.07142707602179144, "value": 0.12283650603410787 }, "Quartile1CardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 2.0 }, "Quartile1CardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 107.25 }, "Quartile1CategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.07132665652246652 }, "Quartile1CategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.3014425793394888 }, "Quartile1CategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.000401382222092676 }, "Quartile1ClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.2806203605514316 }, "Quartile1DecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 1.0 }, "Quartile1DecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 5.0 }, "Quartile1DecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 3.0 }, "Quartile1KurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 6.9324686702015095 }, "Quartile1KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "Quartile1MeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 2.7818163973111814 }, "Quartile1MeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "Quartile1NumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 1.1599826109278069 }, "Quartile1NumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 1.3652560566996312 }, "Quartile1NumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.005350531172490503 }, "Quartile1SkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.2871359905719386 }, "Quartile1SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "Quartile1StdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 5.641815238818264 }, "Quartile1StdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "Quartile2CardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 2.0 }, "Quartile2CardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 191.0 }, "Quartile2CategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.17978494759170108 }, "Quartile2CategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.40854057385576326 }, "Quartile2CategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.0008944661141944213 }, "Quartile2ClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.5 }, "Quartile2DecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 2.0 }, "Quartile2DecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 6.0 }, "Quartile2DecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 6.0 }, "Quartile2KurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 8.871303806575874 }, "Quartile2KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "Quartile2MeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 28.41132258295654 }, "Quartile2MeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "Quartile2NumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 1.47782513171791 }, "Quartile2NumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 1.6768594976548352 }, "Quartile2NumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.013027973551840908 }, "Quartile2SkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.5381528946216996 }, "Quartile2SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "Quartile2StdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 22.30321439330161 }, "Quartile2StdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "Quartile3CardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 2.0 }, "Quartile3CardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 240.25 }, "Quartile3CategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.32727219276174513 }, "Quartile3CategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.5549217204785228 }, "Quartile3CategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.0021054152441979074 }, "Quartile3ClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.7193796394485685 }, "Quartile3DecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 4.0 }, "Quartile3DecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 7.0 }, "Quartile3DecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 15.0 }, "Quartile3KurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 33.86413491040868 }, "Quartile3KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "Quartile3MeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 94.17347838267801 }, "Quartile3MeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "Quartile3NumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 1.5591768572704363 }, "Quartile3NumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 1.7900769788638717 }, "Quartile3NumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.026553810256589254 }, "Quartile3SkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.8995793407555395 }, "Quartile3SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "Quartile3StdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 30.947640856159467 }, "Quartile3StdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "RandomTreeDepth1ErrRate": { - "compute_time": 0.055062069033738226, "value": 0.061240597751753945 }, "RandomTreeDepth1Kappa": { - "compute_time": 0.055062069033738226, "value": 0.0 }, "RandomTreeDepth2ErrRate": { - "compute_time": 0.05544865602860227, "value": 0.061240597751753945 }, "RandomTreeDepth2Kappa": { - "compute_time": 0.05544865602860227, "value": 0.0 }, "RandomTreeDepth3ErrRate": { - "compute_time": 0.05578341003274545, "value": 0.046121234356528484 }, "RandomTreeDepth3Kappa": { - "compute_time": 0.05578341003274545, "value": 0.3551191328085659 }, "RatioOfCategoricalFeatures": { - "compute_time": 1.9931001588702202e-05, "value": 0.7586206896551724 }, "RatioOfDistinctTokens": { - "compute_time": 0.00022466000518761575, "value": 0 }, "RatioOfFeaturesWithMissingValues": { - "compute_time": 0.008072049007751048, "value": 0.27586206896551724 }, "RatioOfInstancesWithMissingValues": { - "compute_time": 0.008072049007751048, "value": 1.0 }, "RatioOfMissingValues": { - "compute_time": 0.008072049007751048, "value": 0.05543569678575346 }, "RatioOfNumericFeatures": { - "compute_time": 1.9931001588702202e-05, "value": 0.2413793103448276 }, "RatioOfTokensContainingNumericChar": { - "compute_time": 0.00022466000518761575, "value": 0 }, "SkewCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 3.077104198750258 }, "SkewCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": -0.08243378475608776 }, "SkewCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 1.8559533279845721 }, "SkewCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 1.7769649555998623 }, "SkewCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 3.875490250344927 }, "SkewClassProbability": { - "compute_time": 0.0012301289971219376, "value": 2.4645212079004602e-16 }, "SkewDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 3.451450787668906 }, "SkewDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 0.5980222656789738 }, "SkewDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 0.5765667421352726 }, "SkewKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 1.7025904731327763 }, "SkewKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "SkewMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 0.3777827539698624 }, "SkewMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "SkewNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": -1.3148700862947547 }, "SkewNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": -1.268339377641566 }, "SkewNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 1.5338906741146343 }, "SkewSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 1.776083146506227 }, "SkewSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "SkewStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": -0.31628077149128647 }, "SkewStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "StdevCardinalityOfCategoricalFeatures": { - "compute_time": 0.0033224879880435765, "value": 0.7101612523427369 }, "StdevCardinalityOfNumericFeatures": { - "compute_time": 0.0013872389972675592, "value": 88.44810154359824 }, "StdevCategoricalAttributeEntropy": { - "compute_time": 0.026999106019502506, "value": 0.24659802585010002 }, "StdevCategoricalJointEntropy": { - "compute_time": 0.0739495230227476, "value": 0.23956528678973543 }, "StdevCategoricalMutualInformation": { - "compute_time": 0.07939626401639543, "value": 0.00886031490652107 }, "StdevClassProbability": { - "compute_time": 0.0012301289971219376, "value": 0.6204993228333702 }, "StdevDecisionTreeAttribute": { - "compute_time": 0.039824964027502574, "value": 11.444238211226388 }, "StdevDecisionTreeBranchLength": { - "compute_time": 0.039884808036731556, "value": 1.551551779725726 }, "StdevDecisionTreeLevelSize": { - "compute_time": 0.039929140024469234, "value": 7.386720271110607 }, "StdevKurtosisOfNumericFeatures": { - "compute_time": 0.0018031450163107365, "value": 92.56654555918352 }, "StdevKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 4.747702041640878e-05, "value": NaN }, "StdevMeansOfNumericFeatures": { - "compute_time": 0.001676646017585881, "value": 52.35637003353562 }, "StdevMeansOfStringLengthOfTextFeatures": { - "compute_time": 4.871402052231133e-05, "value": NaN }, "StdevNumericAttributeEntropy": { - "compute_time": 0.021672753005987033, "value": 0.5768649195847191 }, "StdevNumericJointEntropy": { - "compute_time": 0.02701053602504544, "value": 0.56604507540674 }, "StdevNumericMutualInformation": { - "compute_time": 0.1101939080253942, "value": 0.039885364866017826 }, "StdevSkewnessOfNumericFeatures": { - "compute_time": 0.0017805700190365314, "value": 5.060654984907634 }, "StdevSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 4.742601595353335e-05, "value": NaN }, "StdevStdDevOfNumericFeatures": { - "compute_time": 0.0018315060005988926, "value": 15.425433204049687 }, "StdevStdDevOfStringLengthOfTextFeatures": { - "compute_time": 4.774001718033105e-05, "value": NaN }, "kNN1NErrRate": { - "compute_time": 0.3015885370259639, "value": 0.09093279580094993 }, "kNN1NKappa": { - "compute_time": 0.3015885370259639, "value": 0.16702041294319114 } } \ No newline at end of file diff --git a/tests/data/dataset_metafeatures/small_test_dataset_mf.json b/tests/data/dataset_metafeatures/small_test_dataset_mf.json index afe40c5..4352d0b 100644 --- a/tests/data/dataset_metafeatures/small_test_dataset_mf.json +++ b/tests/data/dataset_metafeatures/small_test_dataset_mf.json @@ -1,926 +1,695 @@ { "CategoricalNoiseToSignalRatio": { - "compute_time": 0.005346329984604381, "value": 1.0834895643494507 }, "ClassEntropy": { - "compute_time": 0.000441155003500171, "value": 1.3321790402101223 }, "DecisionStumpErrRate": { - "compute_time": 0.006736217983416282, "value": 0.7 }, "DecisionStumpKappa": { - "compute_time": 0.006736217983416282, "value": 0.05882352941176472 }, "DecisionTreeHeight": { - "compute_time": 0.0038008579722372815, "value": 4 }, "DecisionTreeLeafCount": { - "compute_time": 0.0038008579722372815, "value": 5 }, "DecisionTreeNodeCount": { - "compute_time": 0.0038008579722372815, "value": 9 }, "DecisionTreeWidth": { - "compute_time": 0.0038148829771671444, "value": 4 }, "Dimensionality": { - "compute_time": 5.145699833519757e-05, "value": 0.5 }, "EquivalentNumberOfCategoricalFeatures": { - "compute_time": 0.00424534898775164, "value": 2.661227372130404 }, "EquivalentNumberOfNumericFeatures": { - "compute_time": 0.008189906991901807, "value": 3.521069926069284 }, "KurtosisCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": -2.0 }, "KurtosisCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": -2.0 }, "KurtosisCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": -2.0 }, "KurtosisCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": -2.0000000000000004 }, "KurtosisCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": -2.0 }, "KurtosisClassProbability": { - "compute_time": 0.001121197987231426, "value": -0.6666666666666661 }, "KurtosisDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": -0.8512709572742021 }, "KurtosisDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 0.24999999999999867 }, "KurtosisDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": -0.9030470914127422 }, "KurtosisKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -2.0 }, "KurtosisKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "KurtosisMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": -2.0 }, "KurtosisMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "KurtosisNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": -2.0 }, "KurtosisNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": -2.0 }, "KurtosisNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": -2.0 }, "KurtosisSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": -1.9999999999999998 }, "KurtosisSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "KurtosisStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": -1.9999999999999998 }, "KurtosisStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "LinearDiscriminantAnalysisErrRate": { - "compute_time": 0.007150971970986575, "value": 0.7 }, "LinearDiscriminantAnalysisKappa": { - "compute_time": 0.007150971970986575, "value": 0.042397660818713545 }, "MajorityClassSize": { - "compute_time": 0.001121197987231426, "value": 4 }, "MaxCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 6.0 }, "MaxCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 9.0 }, "MaxCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 1.4750763110546947 }, "MaxCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.945910149055313 }, "MaxCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.8062004214655205 }, "MaxClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.4 }, "MaxDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 5.0 }, "MaxDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 3.0 }, "MaxDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 4.0 }, "MaxKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -0.833323850726984 }, "MaxKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "MaxMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5714285714285714 }, "MaxMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "MaxNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6931471805599453 }, "MaxNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.5498260458782016 }, "MaxNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.4101163182884089 }, "MaxSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.5062323443248941 }, "MaxSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "MaxStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 2.3704530408864084 }, "MaxStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "MeanCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 4.0 }, "MeanCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 7.5 }, "MeanCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 1.0429703065547942 }, "MeanCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.846988748800701 }, "MeanCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.5005882075922236 }, "MeanClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.25 }, "MeanDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 2.25 }, "MeanDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 2.6 }, "MeanDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 2.25 }, "MeanKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -1.3493249532290488 }, "MeanKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "MeanMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5505892857142857 }, "MeanMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "MeanNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6880276426302085 }, "MeanNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.468060203499046 }, "MeanNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.3783449542841908 }, "MeanSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.42456949227053975 }, "MeanSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "MeanStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 1.306626611641099 }, "MeanStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "MinCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 2.0 }, "MinCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 6.0 }, "MinCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 0.6108643020548935 }, "MinCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.7480673485460894 }, "MinCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.1949759937189266 }, "MinClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.2 }, "MinDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 1.0 }, "MinDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 1.0 }, "MinDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 1.0 }, "MinKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -1.8653260557311135 }, "MinKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "MinMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5297499999999999 }, "MinMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "MinNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6829081047004717 }, "MinNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.3862943611198906 }, "MinNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.3465735902799727 }, "MinSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.3429066402161854 }, "MinSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "MinStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 0.24280018239578932 }, "MinStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "MinorityClassSize": { - "compute_time": 0.001121197987231426, "value": 2 }, "NaiveBayesErrRate": { - "compute_time": 0.00838631197984796, "value": 0.7 }, "NaiveBayesKappa": { - "compute_time": 0.00838631197984796, "value": -0.05555555555555547 }, "NumberOfCategoricalFeatures": { - "compute_time": 2.5093002477660775e-05, "value": 2 }, "NumberOfClasses": { - "compute_time": 0.001121197987231426, "value": 4 }, "NumberOfDistinctTokens": { - "compute_time": 5.389300349634141e-05, "value": 0 }, "NumberOfFeatures": { - "compute_time": 2.5093002477660775e-05, "value": 5 }, "NumberOfFeaturesWithMissingValues": { - "compute_time": 0.0015022940060589463, "value": 4 }, "NumberOfInstances": { - "compute_time": 2.5093002477660775e-05, "value": 10 }, "NumberOfInstancesWithMissingValues": { - "compute_time": 0.0015022940060589463, "value": 10 }, "NumberOfMissingValues": { - "compute_time": 0.0015022940060589463, "value": 18 }, "NumberOfNumericFeatures": { - "compute_time": 2.5093002477660775e-05, "value": 3 }, "NumberOfTokens": { - "compute_time": 5.389300349634141e-05, "value": 0 }, "NumberOfTokensContainingNumericChar": { - "compute_time": 5.389300349634141e-05, "value": 0 }, "NumericNoiseToSignalRatio": { - "compute_time": 0.013097035975079052, "value": 0.8185194089132799 }, "PredDet": { - "compute_time": 0.0036651159753091633, "value": 1.0271201550018805e-08 }, "PredEigen1": { - "compute_time": 0.0036651159753091633, "value": 6.114621511001614 }, "PredEigen2": { - "compute_time": 0.0036651159753091633, "value": 0.39471862161052784 }, "PredEigen3": { - "compute_time": 0.0036651159753091633, "value": 0.2437796639097066 }, "PredPCA1": { - "compute_time": 0.0036651159753091633, "value": 0.8662514396391524 }, "PredPCA2": { - "compute_time": 0.0036651159753091633, "value": 0.05591933590775793 }, "PredPCA3": { - "compute_time": 0.0036651159753091633, "value": 0.03453598631355685 }, "Quartile1CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 3.0 }, "Quartile1CardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 6.75 }, "Quartile1CategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 0.8269173043048439 }, "Quartile1CategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.7975280486733953 }, "Quartile1CategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.3477821006555751 }, "Quartile1ClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.2 }, "Quartile1DecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 1.0 }, "Quartile1DecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 3.0 }, "Quartile1DecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 1.75 }, "Quartile1KurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -1.6073255044800812 }, "Quartile1KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "Quartile1MeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5401696428571428 }, "Quartile1MeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "Quartile1NumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6854678736653401 }, "Quartile1NumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.4271772823094682 }, "Quartile1NumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.36245927228208175 }, "Quartile1SkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.3837380662433626 }, "Quartile1SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "Quartile1StdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 0.7747133970184441 }, "Quartile1StdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "Quartile2CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 4.0 }, "Quartile2CardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 7.5 }, "Quartile2CategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 1.0429703065547942 }, "Quartile2CategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.846988748800701 }, "Quartile2CategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.5005882075922236 }, "Quartile2ClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.2 }, "Quartile2DecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 1.5 }, "Quartile2DecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 3.0 }, "Quartile2DecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 2.0 }, "Quartile2KurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -1.3493249532290488 }, "Quartile2KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "Quartile2MeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5505892857142857 }, "Quartile2MeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "Quartile2NumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6880276426302085 }, "Quartile2NumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.468060203499046 }, "Quartile2NumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.3783449542841908 }, "Quartile2SkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.42456949227053975 }, "Quartile2SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "Quartile2StdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 1.306626611641099 }, "Quartile2StdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "Quartile3CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 5.0 }, "Quartile3CardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 8.25 }, "Quartile3CategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 1.2590233088047444 }, "Quartile3CategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 1.896449448928007 }, "Quartile3CategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.653394314528872 }, "Quartile3ClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.25 }, "Quartile3DecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 2.75 }, "Quartile3DecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 3.0 }, "Quartile3DecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 2.5 }, "Quartile3KurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": -1.0913244019780164 }, "Quartile3KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "Quartile3MeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.5610089285714286 }, "Quartile3MeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "Quartile3NumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.6905874115950769 }, "Quartile3NumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 1.5089431246886238 }, "Quartile3NumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.39423063628629984 }, "Quartile3SkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.46540091829771685 }, "Quartile3SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "Quartile3StdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 1.8385398262637536 }, "Quartile3StdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "RandomTreeDepth1ErrRate": { - "compute_time": 0.006632053991779685, "value": 0.8 }, "RandomTreeDepth1Kappa": { - "compute_time": 0.006632053991779685, "value": -0.1356209150326797 }, "RandomTreeDepth2ErrRate": { - "compute_time": 0.006654870987404138, "value": 0.8 }, "RandomTreeDepth2Kappa": { - "compute_time": 0.006654870987404138, "value": -0.08187134502923976 }, "RandomTreeDepth3ErrRate": { - "compute_time": 0.006593338985112496, "value": 0.8 }, "RandomTreeDepth3Kappa": { - "compute_time": 0.006593338985112496, "value": -0.02631578947368418 }, "RatioOfCategoricalFeatures": { - "compute_time": 2.5093002477660775e-05, "value": 0.4 }, "RatioOfDistinctTokens": { - "compute_time": 5.389300349634141e-05, "value": 0 }, "RatioOfFeaturesWithMissingValues": { - "compute_time": 0.0015022940060589463, "value": 0.8 }, "RatioOfInstancesWithMissingValues": { - "compute_time": 0.0015022940060589463, "value": 1.0 }, "RatioOfMissingValues": { - "compute_time": 0.0015022940060589463, "value": 0.36 }, "RatioOfNumericFeatures": { - "compute_time": 2.5093002477660775e-05, "value": 0.6 }, "RatioOfTokensContainingNumericChar": { - "compute_time": 5.389300349634141e-05, "value": 0 }, "SkewCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 0.0 }, "SkewCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 0.0 }, "SkewCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": -4.3002068602548916e-16 }, "SkewCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 3.360166224435086e-15 }, "SkewCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.0 }, "SkewClassProbability": { - "compute_time": 0.001121197987231426, "value": 1.154700538379252 }, "SkewDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 0.9575491625535641 }, "SkewDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": -1.4999999999999996 }, "SkewDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 0.6520236646847545 }, "SkewKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": 0.0 }, "SkewKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "SkewMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.0 }, "SkewMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "SkewNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": -3.2549324088873523e-14 }, "SkewNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 4.065822249747896e-15 }, "SkewNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.0 }, "SkewSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": -9.954206526050281e-16 }, "SkewSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "SkewStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": -2.7664266387282363e-16 }, "SkewStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "StdevCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006913660035934299, "value": 2.8284271247461903 }, "StdevCardinalityOfNumericFeatures": { - "compute_time": 0.0006070169911254197, "value": 2.1213203435596424 }, "StdevCategoricalAttributeEntropy": { - "compute_time": 0.0015430200000992045, "value": 0.6110901719466091 }, "StdevCategoricalJointEntropy": { - "compute_time": 0.0037680409877793863, "value": 0.1398959858490094 }, "StdevCategoricalMutualInformation": { - "compute_time": 0.0038024309906177223, "value": 0.4322009376864835 }, "StdevClassProbability": { - "compute_time": 0.001121197987231426, "value": 0.1 }, "StdevDecisionTreeAttribute": { - "compute_time": 0.004210929968394339, "value": 1.8929694486000912 }, "StdevDecisionTreeBranchLength": { - "compute_time": 0.004202749973046593, "value": 0.8944271909999161 }, "StdevDecisionTreeLevelSize": { - "compute_time": 0.0042495789675740525, "value": 1.2583057392117916 }, "StdevKurtosisOfNumericFeatures": { - "compute_time": 0.0008750189881538972, "value": 0.7297357573578896 }, "StdevKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 2.5147979613393545e-05, "value": NaN }, "StdevMeansOfNumericFeatures": { - "compute_time": 0.0008603659953223541, "value": 0.029471200487310768 }, "StdevMeansOfStringLengthOfTextFeatures": { - "compute_time": 2.5986984837800264e-05, "value": NaN }, "StdevNumericAttributeEntropy": { - "compute_time": 0.0053484209784073755, "value": 0.007240119973317253 }, "StdevNumericJointEntropy": { - "compute_time": 0.007720729990978725, "value": 0.1156343632314625 }, "StdevNumericMutualInformation": { - "compute_time": 0.007748025993350893, "value": 0.04493149386985759 }, "StdevSkewnessOfNumericFeatures": { - "compute_time": 0.000864130983245559, "value": 0.11548871291733546 }, "StdevSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 2.5009983801282942e-05, "value": NaN }, "StdevStdDevOfNumericFeatures": { - "compute_time": 0.000890960989636369, "value": 1.5044777642496585 }, "StdevStdDevOfStringLengthOfTextFeatures": { - "compute_time": 2.5344983441755176e-05, "value": NaN }, "kNN1NErrRate": { - "compute_time": 0.009328829983132891, "value": 0.8 }, "kNN1NKappa": { - "compute_time": 0.009328829983132891, "value": -0.02631578947368407 } } \ No newline at end of file diff --git a/tests/data/dataset_metafeatures/small_test_dataset_with_text_mf.json b/tests/data/dataset_metafeatures/small_test_dataset_with_text_mf.json index 4a22cc4..1eb93b5 100644 --- a/tests/data/dataset_metafeatures/small_test_dataset_with_text_mf.json +++ b/tests/data/dataset_metafeatures/small_test_dataset_with_text_mf.json @@ -1,926 +1,695 @@ { "CategoricalNoiseToSignalRatio": { - "compute_time": 0.004996385017875582, "value": 1.0834895643494507 }, "ClassEntropy": { - "compute_time": 0.00038413300353568047, "value": 1.3321790402101223 }, "DecisionStumpErrRate": { - "compute_time": 0.007519432998378761, "value": 0.7 }, "DecisionStumpKappa": { - "compute_time": 0.007519432998378761, "value": 0.05882352941176472 }, "DecisionTreeHeight": { - "compute_time": 0.004755564994411543, "value": 4 }, "DecisionTreeLeafCount": { - "compute_time": 0.004755564994411543, "value": 5 }, "DecisionTreeNodeCount": { - "compute_time": 0.004755564994411543, "value": 9 }, "DecisionTreeWidth": { - "compute_time": 0.004783199998200871, "value": 4 }, "Dimensionality": { - "compute_time": 3.920400922652334e-05, "value": 0.7 }, "EquivalentNumberOfCategoricalFeatures": { - "compute_time": 0.003919733004295267, "value": 2.661227372130404 }, "EquivalentNumberOfNumericFeatures": { - "compute_time": 0.007145581999793649, "value": 3.521069926069284 }, "KurtosisCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": -2.0 }, "KurtosisCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": -2.0 }, "KurtosisCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": -2.0 }, "KurtosisCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": -2.0000000000000004 }, "KurtosisCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": -2.0 }, "KurtosisClassProbability": { - "compute_time": 0.0008413559990003705, "value": -0.6666666666666661 }, "KurtosisDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": -0.8512709572742021 }, "KurtosisDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 0.24999999999999867 }, "KurtosisDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": -0.9030470914127422 }, "KurtosisKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -2.0 }, "KurtosisKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -2.0 }, "KurtosisMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": -2.0 }, "KurtosisMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": -2.0 }, "KurtosisNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": -2.0 }, "KurtosisNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": -2.0 }, "KurtosisNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": -2.0 }, "KurtosisSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": -1.9999999999999998 }, "KurtosisSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -2.0 }, "KurtosisStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": -1.9999999999999998 }, "KurtosisStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": -2.0 }, "LinearDiscriminantAnalysisErrRate": { - "compute_time": 0.008506360987666994, "value": 0.7 }, "LinearDiscriminantAnalysisKappa": { - "compute_time": 0.008506360987666994, "value": 0.042397660818713545 }, "MajorityClassSize": { - "compute_time": 0.0008413559990003705, "value": 4 }, "MaxCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 6.0 }, "MaxCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 9.0 }, "MaxCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 1.4750763110546947 }, "MaxCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.945910149055313 }, "MaxCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.8062004214655205 }, "MaxClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.4 }, "MaxDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 5.0 }, "MaxDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 3.0 }, "MaxDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 4.0 }, "MaxKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -0.833323850726984 }, "MaxKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -0.7327709457111773 }, "MaxMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5714285714285714 }, "MaxMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.444444444444445 }, "MaxNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6931471805599453 }, "MaxNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.5498260458782016 }, "MaxNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.4101163182884089 }, "MaxSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.5062323443248941 }, "MaxSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.07596711654031493 }, "MaxStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 2.3704530408864084 }, "MaxStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 7.180993431738164 }, "MeanCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 4.0 }, "MeanCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 7.5 }, "MeanCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 1.0429703065547942 }, "MeanCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.846988748800701 }, "MeanCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.5005882075922236 }, "MeanClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.25 }, "MeanDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 2.25 }, "MeanDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 2.6 }, "MeanDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 2.25 }, "MeanKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -1.3493249532290488 }, "MeanKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -0.8881645688607547 }, "MeanMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5505892857142857 }, "MeanMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.372222222222224 }, "MeanNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6880276426302085 }, "MeanNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.468060203499046 }, "MeanNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.3783449542841908 }, "MeanSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.42456949227053975 }, "MeanSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.31046540734232103 }, "MeanStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 1.306626611641099 }, "MeanStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 6.002849186890771 }, "MinCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 2.0 }, "MinCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 6.0 }, "MinCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 0.6108643020548935 }, "MinCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.7480673485460894 }, "MinCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.1949759937189266 }, "MinClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.2 }, "MinDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 1.0 }, "MinDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 1.0 }, "MinDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 1.0 }, "MinKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -1.8653260557311135 }, "MinKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -1.043558192010332 }, "MinMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5297499999999999 }, "MinMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.3 }, "MinNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6829081047004717 }, "MinNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.3862943611198906 }, "MinNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.3465735902799727 }, "MinSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.3429066402161854 }, "MinSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.5449636981443271 }, "MinStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 0.24280018239578932 }, "MinStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 4.824704942043376 }, "MinorityClassSize": { - "compute_time": 0.0008413559990003705, "value": 2 }, "NaiveBayesErrRate": { - "compute_time": 0.008581198999308981, "value": 0.7 }, "NaiveBayesKappa": { - "compute_time": 0.008581198999308981, "value": -0.05555555555555547 }, "NumberOfCategoricalFeatures": { - "compute_time": 1.9059007172472775e-05, "value": 4 }, "NumberOfClasses": { - "compute_time": 0.0008413559990003705, "value": 4 }, "NumberOfDistinctTokens": { - "compute_time": 0.003991757010226138, "value": 18 }, "NumberOfFeatures": { - "compute_time": 1.9059007172472775e-05, "value": 7 }, "NumberOfFeaturesWithMissingValues": { - "compute_time": 0.0013372880057431757, "value": 5 }, "NumberOfInstances": { - "compute_time": 1.9059007172472775e-05, "value": 10 }, "NumberOfInstancesWithMissingValues": { - "compute_time": 0.0013372880057431757, "value": 10 }, "NumberOfMissingValues": { - "compute_time": 0.0013372880057431757, "value": 19 }, "NumberOfNumericFeatures": { - "compute_time": 1.9059007172472775e-05, "value": 3 }, "NumberOfTokens": { - "compute_time": 0.003991757010226138, "value": 45 }, "NumberOfTokensContainingNumericChar": { - "compute_time": 0.003991757010226138, "value": 2 }, "NumericNoiseToSignalRatio": { - "compute_time": 0.011622850026469678, "value": 0.8185194089132799 }, "PredDet": { - "compute_time": 0.004457675007870421, "value": 1.0271201550018805e-08 }, "PredEigen1": { - "compute_time": 0.004457675007870421, "value": 6.114621511001614 }, "PredEigen2": { - "compute_time": 0.004457675007870421, "value": 0.39471862161052784 }, "PredEigen3": { - "compute_time": 0.004457675007870421, "value": 0.2437796639097066 }, "PredPCA1": { - "compute_time": 0.004457675007870421, "value": 0.8662514396391524 }, "PredPCA2": { - "compute_time": 0.004457675007870421, "value": 0.05591933590775793 }, "PredPCA3": { - "compute_time": 0.004457675007870421, "value": 0.03453598631355685 }, "Quartile1CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 3.0 }, "Quartile1CardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 6.75 }, "Quartile1CategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 0.8269173043048439 }, "Quartile1CategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.7975280486733953 }, "Quartile1CategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.3477821006555751 }, "Quartile1ClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.2 }, "Quartile1DecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 1.0 }, "Quartile1DecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 3.0 }, "Quartile1DecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 1.75 }, "Quartile1KurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -1.6073255044800812 }, "Quartile1KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -0.9658613804355434 }, "Quartile1MeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5401696428571428 }, "Quartile1MeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.336111111111112 }, "Quartile1NumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6854678736653401 }, "Quartile1NumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.4271772823094682 }, "Quartile1NumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.36245927228208175 }, "Quartile1SkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.3837380662433626 }, "Quartile1SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.42771455274332404 }, "Quartile1StdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 0.7747133970184441 }, "Quartile1StdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 5.413777064467073 }, "Quartile2CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 4.0 }, "Quartile2CardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 7.5 }, "Quartile2CategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 1.0429703065547942 }, "Quartile2CategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.846988748800701 }, "Quartile2CategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.5005882075922236 }, "Quartile2ClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.2 }, "Quartile2DecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 1.5 }, "Quartile2DecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 3.0 }, "Quartile2DecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 2.0 }, "Quartile2KurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -1.3493249532290488 }, "Quartile2KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -0.8881645688607547 }, "Quartile2MeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5505892857142857 }, "Quartile2MeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.372222222222224 }, "Quartile2NumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6880276426302085 }, "Quartile2NumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.468060203499046 }, "Quartile2NumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.3783449542841908 }, "Quartile2SkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.42456949227053975 }, "Quartile2SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.31046540734232103 }, "Quartile2StdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 1.306626611641099 }, "Quartile2StdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 6.002849186890771 }, "Quartile3CardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 5.0 }, "Quartile3CardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 8.25 }, "Quartile3CategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 1.2590233088047444 }, "Quartile3CategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 1.896449448928007 }, "Quartile3CategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.653394314528872 }, "Quartile3ClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.25 }, "Quartile3DecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 2.75 }, "Quartile3DecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 3.0 }, "Quartile3DecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 2.5 }, "Quartile3KurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": -1.0913244019780164 }, "Quartile3KurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": -0.810467757285966 }, "Quartile3MeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.5610089285714286 }, "Quartile3MeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 11.408333333333335 }, "Quartile3NumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.6905874115950769 }, "Quartile3NumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 1.5089431246886238 }, "Quartile3NumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.39423063628629984 }, "Quartile3SkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.46540091829771685 }, "Quartile3SkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": -0.19321626194131797 }, "Quartile3StdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 1.8385398262637536 }, "Quartile3StdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 6.591921309314467 }, "RandomTreeDepth1ErrRate": { - "compute_time": 0.007847974004107527, "value": 0.8 }, "RandomTreeDepth1Kappa": { - "compute_time": 0.007847974004107527, "value": -0.1356209150326797 }, "RandomTreeDepth2ErrRate": { - "compute_time": 0.00788460198964458, "value": 0.8 }, "RandomTreeDepth2Kappa": { - "compute_time": 0.00788460198964458, "value": -0.08187134502923976 }, "RandomTreeDepth3ErrRate": { - "compute_time": 0.008033875987166539, "value": 0.8 }, "RandomTreeDepth3Kappa": { - "compute_time": 0.008033875987166539, "value": -0.02631578947368418 }, "RatioOfCategoricalFeatures": { - "compute_time": 1.9059007172472775e-05, "value": 0.5714285714285714 }, "RatioOfDistinctTokens": { - "compute_time": 0.003991757010226138, "value": 0.4 }, "RatioOfFeaturesWithMissingValues": { - "compute_time": 0.0013372880057431757, "value": 0.7142857142857143 }, "RatioOfInstancesWithMissingValues": { - "compute_time": 0.0013372880057431757, "value": 1.0 }, "RatioOfMissingValues": { - "compute_time": 0.0013372880057431757, "value": 0.2714285714285714 }, "RatioOfNumericFeatures": { - "compute_time": 1.9059007172472775e-05, "value": 0.42857142857142855 }, "RatioOfTokensContainingNumericChar": { - "compute_time": 0.003991757010226138, "value": 0.044444444444444446 }, "SkewCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 0.0 }, "SkewCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 0.0 }, "SkewCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": -4.3002068602548916e-16 }, "SkewCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 3.360166224435086e-15 }, "SkewCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.0 }, "SkewClassProbability": { - "compute_time": 0.0008413559990003705, "value": 1.154700538379252 }, "SkewDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 0.9575491625535641 }, "SkewDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": -1.4999999999999996 }, "SkewDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 0.6520236646847545 }, "SkewKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": 0.0 }, "SkewKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": 0.0 }, "SkewMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.0 }, "SkewMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": -3.6910939996323705e-14 }, "SkewNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": -3.2549324088873523e-14 }, "SkewNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 4.065822249747896e-15 }, "SkewNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.0 }, "SkewSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": -9.954206526050281e-16 }, "SkewSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": 2.0179131481261978e-16 }, "SkewStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": -2.7664266387282363e-16 }, "SkewStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": -1.0862625678401797e-15 }, "StdevCardinalityOfCategoricalFeatures": { - "compute_time": 0.0006040810112608597, "value": 2.8284271247461903 }, "StdevCardinalityOfNumericFeatures": { - "compute_time": 0.0005782410007668659, "value": 2.1213203435596424 }, "StdevCategoricalAttributeEntropy": { - "compute_time": 0.0014615540130762383, "value": 0.6110901719466091 }, "StdevCategoricalJointEntropy": { - "compute_time": 0.003323216995340772, "value": 0.1398959858490094 }, "StdevCategoricalMutualInformation": { - "compute_time": 0.003533922994392924, "value": 0.4322009376864835 }, "StdevClassProbability": { - "compute_time": 0.0008413559990003705, "value": 0.1 }, "StdevDecisionTreeAttribute": { - "compute_time": 0.005235812001046725, "value": 1.8929694486000912 }, "StdevDecisionTreeBranchLength": { - "compute_time": 0.005243924009846523, "value": 0.8944271909999161 }, "StdevDecisionTreeLevelSize": { - "compute_time": 0.0053174979984760284, "value": 1.2583057392117916 }, "StdevKurtosisOfNumericFeatures": { - "compute_time": 0.0008311910060001537, "value": 0.7297357573578896 }, "StdevKurtosisOfStringLengthOfTextFeatures": { - "compute_time": 0.0013846859947079793, "value": 0.2197597693644261 }, "StdevMeansOfNumericFeatures": { - "compute_time": 0.0008160220022546127, "value": 0.029471200487310768 }, "StdevMeansOfStringLengthOfTextFeatures": { - "compute_time": 0.0014370889985002577, "value": 0.10213764617138983 }, "StdevNumericAttributeEntropy": { - "compute_time": 0.004861487017478794, "value": 0.007240119973317253 }, "StdevNumericJointEntropy": { - "compute_time": 0.00662510801339522, "value": 0.1156343632314625 }, "StdevNumericMutualInformation": { - "compute_time": 0.006760709002264775, "value": 0.04493149386985759 }, "StdevSkewnessOfNumericFeatures": { - "compute_time": 0.0008531590137863532, "value": 0.11548871291733546 }, "StdevSkewnessOfStringLengthOfTextFeatures": { - "compute_time": 0.0014534349902532995, "value": 0.331630663205507 }, "StdevStdDevOfNumericFeatures": { - "compute_time": 0.0008458100055577233, "value": 1.5044777642496585 }, "StdevStdDevOfStringLengthOfTextFeatures": { - "compute_time": 0.0014100179832894355, "value": 1.666147569494993 }, "kNN1NErrRate": { - "compute_time": 0.009689243990578689, "value": 0.8 }, "kNN1NKappa": { - "compute_time": 0.009689243990578689, "value": -0.02631578947368407 } } \ No newline at end of file From c36195ae76a00b9bc8c6987d457420639fe97b1d Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Wed, 18 Sep 2019 09:13:28 -0600 Subject: [PATCH 7/9] added return_times to docstring --- metalearn/metafeatures/metafeatures.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metalearn/metafeatures/metafeatures.py b/metalearn/metafeatures/metafeatures.py index 2bd8aea..e5a5f40 100644 --- a/metalearn/metafeatures/metafeatures.py +++ b/metalearn/metafeatures/metafeatures.py @@ -111,6 +111,9 @@ def compute( will be run to completion. Otherwise, execution will halt after approximately timeout seconds. Any metafeatures that have not been computed will be labeled 'TIMEOUT'. + return_times: bool, default False. When true, includes compute times for + each metafeature. **Note** Metafeatures are timed as if each dependency + has to be recomputed whenever it is needed. Returns ------- From 8e71d38590d992aa2e3a7bc93c2b889f00febc48 Mon Sep 17 00:00:00 2001 From: Mason Poggemann Date: Wed, 18 Sep 2019 09:27:32 -0600 Subject: [PATCH 8/9] added not in README about compute times --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f0c05d3..7bdde41 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,8 @@ mfs = metafeatures.compute( seed=0, n_folds=2, verbose=True, - timeout=10 + timeout=10, + return_times=True, ) print(mfs) @@ -68,6 +69,9 @@ print(mfs) # RatioOfNumericFeatures # {'RatioOfNumericFeatures': {'value': 0.5, 'compute_time': 3.9138991269283e-05}} ``` +**Warning:** Metafeatures are timed as if each dependency has to be recomputed whenever it is needed. +This means that the returned times may not be accurate for a particular application, especially if a +metafeature depends on a computationally intensive resource in multiple places. ## Using the Test Suite From 67a13fd960b43b2fa778f1c6894cad923834ef5e Mon Sep 17 00:00:00 2001 From: Brandon Schoenfeld Date: Thu, 14 Nov 2019 15:08:28 -0700 Subject: [PATCH 9/9] cleanup --- metalearn/metafeatures/metafeatures.py | 5 +++-- tests/single_run.py | 0 2 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 tests/single_run.py diff --git a/metalearn/metafeatures/metafeatures.py b/metalearn/metafeatures/metafeatures.py index e5a5f40..fe6245d 100644 --- a/metalearn/metafeatures/metafeatures.py +++ b/metalearn/metafeatures/metafeatures.py @@ -25,6 +25,7 @@ from metalearn.metafeatures.statistical_metafeatures import metafeatures_info as statistical_metafeatures from metalearn.metafeatures.text_metafeatures import metafeatures_info as text_metafeatures + class Metafeatures(object): """ Computes metafeatures on a given tabular dataset (pandas.DataFrame) with @@ -112,8 +113,8 @@ def compute( approximately timeout seconds. Any metafeatures that have not been computed will be labeled 'TIMEOUT'. return_times: bool, default False. When true, includes compute times for - each metafeature. **Note** Metafeatures are timed as if each dependency - has to be recomputed whenever it is needed. + each metafeature. **Note** compute times are are overestimated. + See https://github.com/byu-dml/metalearn/issues/205. Returns ------- diff --git a/tests/single_run.py b/tests/single_run.py deleted file mode 100644 index e69de29..0000000