Skip to content

Commit

Permalink
avoid_generating_more_column_than_data (#332)
Browse files Browse the repository at this point in the history
  • Loading branch information
peteryang1 authored Sep 25, 2024
1 parent 6d5efa8 commit cc0a86d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 0 deletions.
7 changes: 7 additions & 0 deletions rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,13 @@ def evaluate(
if version == 1:
feedback_str, _ = FactorSingleColumnEvaluator(self.scen).evaluate(implementation, gt_implementation)
conclusions.append(feedback_str)
elif version == 2:
input_shape = self.scen.input_shape
_, gen_df = self._get_df(gt_implementation, implementation)
if gen_df.shape[-1] > input_shape[-1]:
conclusions.append(
"Output dataframe has more columns than input feature which is not acceptable in feature processing tasks. Please check the implementation to avoid generating too many columns. Consider this implementation as a failure."
)

# Check if the index of the dataframe is ("datetime", "instrument")
feedback_str, _ = FactorOutputFormatEvaluator(self.scen).evaluate(implementation, gt_implementation)
Expand Down
2 changes: 2 additions & 0 deletions rdagent/scenarios/kaggle/experiment/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def source_data(self) -> str:
pickle.dump(X_test, open(data_folder / "X_test.pkl", "wb"))
pickle.dump(others, open(data_folder / "others.pkl", "wb"))

self.input_shape = X_train.shape

buffer = io.StringIO()
X_valid.info(verbose=True, buf=buffer, show_counts=True)
data_info = buffer.getvalue()
Expand Down

0 comments on commit cc0a86d

Please sign in to comment.