From c1d94b54ab428d66db900d4fe5b453c1c36595e5 Mon Sep 17 00:00:00 2001 From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Date: Sat, 28 Sep 2024 06:12:06 +0800 Subject: [PATCH] fix two template (#376) --- .../digit-recognizer_template/fea_share_preprocess.py | 7 +------ .../experiment/playground-series-s4e8_template/train.py | 5 ----- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/fea_share_preprocess.py b/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/fea_share_preprocess.py index ecf1606c..40b42051 100644 --- a/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/fea_share_preprocess.py +++ b/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/fea_share_preprocess.py @@ -57,7 +57,7 @@ def clean_and_impute_data(X_train, X_valid, X_test): then imputes missing values using the mean strategy. Also removes duplicate columns. """ - # Replace inf and -inf with NaN + # Replace inf and -inf with NaNa X_train.replace([np.inf, -np.inf], np.nan, inplace=True) X_valid.replace([np.inf, -np.inf], np.nan, inplace=True) X_test.replace([np.inf, -np.inf], np.nan, inplace=True) @@ -68,9 +68,4 @@ def clean_and_impute_data(X_train, X_valid, X_test): X_valid = pd.DataFrame(imputer.transform(X_valid), columns=X_valid.columns) X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns) - # Remove duplicate columns - X_train = X_train.loc[:, ~X_train.columns.duplicated()] - X_valid = X_valid.loc[:, ~X_valid.columns.duplicated()] - X_test = X_test.loc[:, ~X_test.columns.duplicated()] - return X_train, X_valid, X_test diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/train.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/train.py index 73c06406..4d744d0f 100644 --- a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/train.py +++ b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/train.py @@ -53,11 +53,6 @@ def import_module_from_path(module_name, module_path): print(X_train.shape, X_valid.shape, X_test.shape) -# Handle inf and -inf values -X_train.replace([np.inf, -np.inf], np.nan, inplace=True) -X_valid.replace([np.inf, -np.inf], np.nan, inplace=True) -X_test.replace([np.inf, -np.inf], np.nan, inplace=True) - from sklearn.impute import SimpleImputer imputer = SimpleImputer(strategy="mean")