From 3d36c452ff0983800e5343834cc69f24a508ea70 Mon Sep 17 00:00:00 2001 From: Way2Learn <118058822+Xisen-Wang@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:45:08 +0800 Subject: [PATCH] feat: support Multi output (#330) * Key changes * Revised to support submission specifications * Revised to support submission specifications * revise CI * CI-Fix * fixing-CI * Support COSTEER Multi-Dimension for output & bug-fix * Revised to support submission specifications * revise CI * CI-Fix * fixing-CI * Support COSTEER Multi-Dimension for output & bug-fix * Linting --- .../scenarios/kaggle/experiment/prompts.yaml | 6 ++- .../scenarios/kaggle/experiment/scenario.py | 43 +++++++++++++------ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml index f90669a8..eb09fe7b 100644 --- a/rdagent/scenarios/kaggle/experiment/prompts.yaml +++ b/rdagent/scenarios/kaggle/experiment/prompts.yaml @@ -314,7 +314,11 @@ kg_feature_simulator: |- kg_model_output_format: |- For feature related tasks, the output should be a pandas DataFrame with the new features. The columns should be the new features, and the rows should correspond to the number of samples in the input DataFrame. - For model related tasks, the output should be an np.ndarray with the appropriate number of predictions, each prediction being a single value. The output should be a 2D array with dimensions corresponding to the number of predictions and 1 column (e.g., (8, 1) if there are 8 predictions). + For model related tasks: + 1. the output should be an np.ndarray with the appropriate number of predictions & the appropriate values within each prediction + 2. the output should be a 2D array with dimensions corresponding to the number of predictions and the number of things to output. Eg, if 4 predictions, each prediction needs to predict 3 probabilities, then (4,3). Or (8, 1) if there are 8 predictions but each prediction is only one value. + 3. please reference the competition's submission requirement and align with that. + Submission Requirements here:\n: {{submission_specifications}} kg_model_simulator: |- The models will be trained on the competition dataset and evaluated on their ability to predict the target. Metrics like accuracy and AUC-ROC is used to evaluate the model performance. diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py index e4536ab7..914f2b61 100644 --- a/rdagent/scenarios/kaggle/experiment/scenario.py +++ b/rdagent/scenarios/kaggle/experiment/scenario.py @@ -25,9 +25,6 @@ def __init__(self, competition: str) -> None: self.competition = competition self.competition_descriptions = crawl_descriptions(competition) self._source_data = self.source_data - self._output_format = self.output_format - self._interface = self.interface - self._simulator = self.simulator self.competition_type = None self.competition_description = None @@ -35,10 +32,15 @@ def __init__(self, competition: str) -> None: self.competition_features = None self.submission_specifications = None self._analysis_competition_description() - self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB + # Move these assignments after _analysis_competition_description + self._output_format = self.output_format + self._interface = self.interface + self._simulator = self.simulator self._background = self.background + self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB + def _analysis_competition_description(self): sys_prompt = ( Environment(undefined=StrictUndefined) @@ -61,14 +63,25 @@ def _analysis_competition_description(self): json_mode=True, ) - response_json_analysis = json.loads(response_analysis) - self.competition_type = response_json_analysis.get("Competition Type", "No type provided") - self.competition_description = response_json_analysis.get("Competition Description", "No description provided") - self.target_description = response_json_analysis.get("Target Description", "No target provided") - self.competition_features = response_json_analysis.get("Competition Features", "No features provided") - self.submission_specifications = response_json_analysis.get( - "Submission Specifications", "No submission requirements provided" - ) + try: + response_json_analysis = json.loads(response_analysis) + self.competition_type = response_json_analysis.get("Competition Type", "No type provided") + self.competition_description = response_json_analysis.get( + "Competition Description", "No description provided" + ) + self.target_description = response_json_analysis.get("Target Description", "No target provided") + self.competition_features = response_json_analysis.get("Competition Features", "No features provided") + self.submission_specifications = response_json_analysis.get( + "Submission Specifications", "No submission requirements provided" + ) + except json.JSONDecodeError: + print(f"Failed to parse JSON response: {response_analysis}") + # Set default values if JSON parsing fails + self.competition_type = "Unknown" + self.competition_description = "No description available" + self.target_description = "No target available" + self.competition_features = "No features available" + self.submission_specifications = "No submission requirements available" def get_competition_full_desc(self) -> str: return f"""Competition Type: {self.competition_type} @@ -137,7 +150,11 @@ def source_data(self) -> str: @property def output_format(self) -> str: - return prompt_dict["kg_model_output_format"] + return ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["kg_model_output_format"]) + .render(submission_specifications=self.submission_specifications) + ) @property def interface(self) -> str: