From 14f7d976e03c92d6e727524e0cdad8a03b585016 Mon Sep 17 00:00:00 2001 From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Date: Mon, 30 Sep 2024 12:30:42 +0800 Subject: [PATCH 1/2] fix: refine the ucb algorithm (#406) * refine the ucb algorithm * fix a ci error --- rdagent/scenarios/kaggle/developer/feedback.py | 3 +++ rdagent/scenarios/kaggle/experiment/prompts.yaml | 2 +- rdagent/scenarios/kaggle/proposal/proposal.py | 16 +++++++--------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py index 085bdcb8..8d4bad54 100644 --- a/rdagent/scenarios/kaggle/developer/feedback.py +++ b/rdagent/scenarios/kaggle/developer/feedback.py @@ -177,6 +177,9 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac elif self.scen.if_using_graph_rag: trace.knowledge_base.add_document(experiment_feedback, self.scen) + if self.scen.if_action_choosing_based_on_UCB: + self.scen.action_counts[hypothesis.action] += 1 + return HypothesisFeedback( observations=observations, hypothesis_evaluation=hypothesis_evaluation, diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml index 5bb00ecc..b696cd46 100644 --- a/rdagent/scenarios/kaggle/experiment/prompts.yaml +++ b/rdagent/scenarios/kaggle/experiment/prompts.yaml @@ -25,7 +25,7 @@ kg_description_template: kg_background: |- You are solving a data science tasks and the type of the competition is {{ competition_type }}. - The competition description is:{{competition_description}}. + The competition description is: {{competition_description}}. We provide an overall script in file: train.py. The user will run the train.py script along with several feature and model scripts to train several model to get a good performance on this task. diff --git a/rdagent/scenarios/kaggle/proposal/proposal.py b/rdagent/scenarios/kaggle/proposal/proposal.py index 1b3f269a..517d5fdb 100644 --- a/rdagent/scenarios/kaggle/proposal/proposal.py +++ b/rdagent/scenarios/kaggle/proposal/proposal.py @@ -130,13 +130,13 @@ def generate_RAG_content(self, trace: Trace, hypothesis_and_feedback: str, targe found_hypothesis_nodes = [] for similar_node in similar_nodes: - for hypothesis_type in KG_ACTION_LIST: - hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps( - start_node=similar_node, - steps=3, - constraint_labels=[hypothesis_type], - ) - found_hypothesis_nodes.extend(hypothesis_nodes[:2]) + # for hypothesis_type in KG_ACTION_LIST: + hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps( + start_node=similar_node, + steps=3, + constraint_labels=[target], + ) + found_hypothesis_nodes.extend(hypothesis_nodes[:2]) found_hypothesis_nodes = sorted(list(set(found_hypothesis_nodes)), key=lambda x: len(x.content)) @@ -204,7 +204,6 @@ def execute_next_action(self, trace: Trace) -> str: for action in actions: if self.scen.action_counts[action] == 0: selected_action = action - self.scen.action_counts[selected_action] += 1 return selected_action c = self.scen.confidence_parameter @@ -216,7 +215,6 @@ def execute_next_action(self, trace: Trace) -> str: ucb_values[action] = ucb # Select action with highest UCB selected_action = max(ucb_values, key=ucb_values.get) - self.scen.action_counts[selected_action] += 1 return selected_action From 94c47d6fd5c3e38fc786a83e6d0d05e8d04498f3 Mon Sep 17 00:00:00 2001 From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:54:56 +0800 Subject: [PATCH 2/2] fix: fix a bug in competition metric evaluation (#407) * fix a bug in competition metric evaluation * fix a bug * fix a bug in rag loading --- rdagent/core/knowledge_base.py | 10 ++++++---- rdagent/scenarios/kaggle/experiment/prompts.yaml | 5 +++-- rdagent/scenarios/kaggle/experiment/scenario.py | 9 +++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/rdagent/core/knowledge_base.py b/rdagent/core/knowledge_base.py index f4cd9ab5..f8385c1b 100644 --- a/rdagent/core/knowledge_base.py +++ b/rdagent/core/knowledge_base.py @@ -13,13 +13,15 @@ def __init__(self, path: str | Path | None = None) -> None: def load(self) -> None: if self.path is not None and self.path.exists(): with self.path.open("rb") as f: - self.__dict__.update( - pickle.load(f).__dict__, - ) # TODO: because we need to align with init function, we need a less hacky way to do this + loaded = pickle.load(f) + if isinstance(loaded, dict): + self.__dict__.update(loaded) + else: + self.__dict__.update(loaded.__dict__) def dump(self) -> None: if self.path is not None: self.path.parent.mkdir(parents=True, exist_ok=True) - pickle.dump(self, self.path.open("wb")) + pickle.dump(self.__dict__, self.path.open("wb")) else: logger.warning("KnowledgeBase path is not set, dump failed.") diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml index b696cd46..528bd68b 100644 --- a/rdagent/scenarios/kaggle/experiment/prompts.yaml +++ b/rdagent/scenarios/kaggle/experiment/prompts.yaml @@ -11,8 +11,7 @@ kg_description_template: "Competition Features": "Two-line description of the overall features involved within the competition as background." "Submission Specifications": "The submission specification & sample submission csv descriptions for the model to output." "Submission channel number to each sample": "The number of channels in the output for each sample, e.g., 1 for regression, N for N class classification with probabilities, etc. A Integer. If not specified, it is 1." - "Evaluation Description": "A brief description for what metrics are used in evaluation. An explanation of whether a higher score is better or lower is better in terms of performance." - "Evaluation Boolean": "True" or "False" (True means the higher score the better (like accuracy); False means the lower value the better (like loss).) + "Evaluation Description": "A brief description of the metrics used in the evaluation. Please note that if `evaluation_metric_direction` is True, it indicates that higher values are better; if False, lower values are preferred." } Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together. @@ -22,6 +21,8 @@ kg_description_template: {{ competition_descriptions }} The raw data information: {{ raw_data_information }} + Evaluation_metric_direction: + {{ evaluation_metric_direction }} kg_background: |- You are solving a data science tasks and the type of the competition is {{ competition_type }}. diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py index 3906e2d5..2e32edf7 100644 --- a/rdagent/scenarios/kaggle/experiment/scenario.py +++ b/rdagent/scenarios/kaggle/experiment/scenario.py @@ -50,7 +50,8 @@ def __init__(self, competition: str) -> None: self.submission_specifications = None self.model_output_channel = None self.evaluation_desc = None - self.evaluation_metric_direction = None + self.leaderboard = leaderboard_scores(competition) + self.evaluation_metric_direction = float(self.leaderboard[0]) > float(self.leaderboard[-1]) self.vector_base = None self.mini_case = KAGGLE_IMPLEMENT_SETTING.mini_case self._analysis_competition_description() @@ -75,8 +76,6 @@ def __init__(self, competition: str) -> None: self.confidence_parameter = 1.0 self.initial_performance = 0.0 - self.leaderboard = leaderboard_scores(competition) - def _analysis_competition_description(self): sys_prompt = ( Environment(undefined=StrictUndefined) @@ -90,6 +89,7 @@ def _analysis_competition_description(self): .render( competition_descriptions=self.competition_descriptions, raw_data_information=self._source_data, + evaluation_metric_direction=self.evaluation_metric_direction, ) ) @@ -111,9 +111,6 @@ def _analysis_competition_description(self): self.evaluation_desc = response_json_analysis.get( "Evaluation Description", "No evaluation specification provided." ) - self.evaluation_metric_direction = response_json_analysis.get( - "Evaluation Boolean", "No evaluation specification provided." - ) def get_competition_full_desc(self) -> str: evaluation_direction = "higher the better" if self.evaluation_metric_direction else "lower the better"