From 14f7d976e03c92d6e727524e0cdad8a03b585016 Mon Sep 17 00:00:00 2001
From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com>
Date: Mon, 30 Sep 2024 12:30:42 +0800
Subject: [PATCH 1/2] fix: refine the ucb algorithm (#406)

* refine the ucb algorithm

* fix a ci error
---
 rdagent/scenarios/kaggle/developer/feedback.py   |  3 +++
 rdagent/scenarios/kaggle/experiment/prompts.yaml |  2 +-
 rdagent/scenarios/kaggle/proposal/proposal.py    | 16 +++++++---------
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py
index 085bdcb8..8d4bad54 100644
--- a/rdagent/scenarios/kaggle/developer/feedback.py
+++ b/rdagent/scenarios/kaggle/developer/feedback.py
@@ -177,6 +177,9 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
         elif self.scen.if_using_graph_rag:
             trace.knowledge_base.add_document(experiment_feedback, self.scen)
 
+        if self.scen.if_action_choosing_based_on_UCB:
+            self.scen.action_counts[hypothesis.action] += 1
+
         return HypothesisFeedback(
             observations=observations,
             hypothesis_evaluation=hypothesis_evaluation,
diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
index 5bb00ecc..b696cd46 100644
--- a/rdagent/scenarios/kaggle/experiment/prompts.yaml
+++ b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -25,7 +25,7 @@ kg_description_template:
 
 kg_background: |-
   You are solving a data science tasks and the type of the competition is {{ competition_type }}.
-  The competition description is:{{competition_description}}. 
+  The competition description is: {{competition_description}}. 
   
   We provide an overall script in file: train.py. The user will run the train.py script along with several feature and model scripts to train several model to get a good performance on this task.
 
diff --git a/rdagent/scenarios/kaggle/proposal/proposal.py b/rdagent/scenarios/kaggle/proposal/proposal.py
index 1b3f269a..517d5fdb 100644
--- a/rdagent/scenarios/kaggle/proposal/proposal.py
+++ b/rdagent/scenarios/kaggle/proposal/proposal.py
@@ -130,13 +130,13 @@ def generate_RAG_content(self, trace: Trace, hypothesis_and_feedback: str, targe
 
         found_hypothesis_nodes = []
         for similar_node in similar_nodes:
-            for hypothesis_type in KG_ACTION_LIST:
-                hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps(
-                    start_node=similar_node,
-                    steps=3,
-                    constraint_labels=[hypothesis_type],
-                )
-                found_hypothesis_nodes.extend(hypothesis_nodes[:2])
+            # for hypothesis_type in KG_ACTION_LIST:
+            hypothesis_nodes = trace.knowledge_base.get_nodes_within_steps(
+                start_node=similar_node,
+                steps=3,
+                constraint_labels=[target],
+            )
+            found_hypothesis_nodes.extend(hypothesis_nodes[:2])
 
         found_hypothesis_nodes = sorted(list(set(found_hypothesis_nodes)), key=lambda x: len(x.content))
 
@@ -204,7 +204,6 @@ def execute_next_action(self, trace: Trace) -> str:
         for action in actions:
             if self.scen.action_counts[action] == 0:
                 selected_action = action
-                self.scen.action_counts[selected_action] += 1
                 return selected_action
 
         c = self.scen.confidence_parameter
@@ -216,7 +215,6 @@ def execute_next_action(self, trace: Trace) -> str:
             ucb_values[action] = ucb
         # Select action with highest UCB
         selected_action = max(ucb_values, key=ucb_values.get)
-        self.scen.action_counts[selected_action] += 1
 
         return selected_action
 

From 94c47d6fd5c3e38fc786a83e6d0d05e8d04498f3 Mon Sep 17 00:00:00 2001
From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com>
Date: Mon, 30 Sep 2024 14:54:56 +0800
Subject: [PATCH 2/2] fix: fix a bug in competition metric evaluation (#407)

* fix a bug in competition metric evaluation

* fix a bug

* fix a bug in rag loading
---
 rdagent/core/knowledge_base.py                   | 10 ++++++----
 rdagent/scenarios/kaggle/experiment/prompts.yaml |  5 +++--
 rdagent/scenarios/kaggle/experiment/scenario.py  |  9 +++------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/rdagent/core/knowledge_base.py b/rdagent/core/knowledge_base.py
index f4cd9ab5..f8385c1b 100644
--- a/rdagent/core/knowledge_base.py
+++ b/rdagent/core/knowledge_base.py
@@ -13,13 +13,15 @@ def __init__(self, path: str | Path | None = None) -> None:
     def load(self) -> None:
         if self.path is not None and self.path.exists():
             with self.path.open("rb") as f:
-                self.__dict__.update(
-                    pickle.load(f).__dict__,
-                )  # TODO: because we need to align with init function, we need a less hacky way to do this
+                loaded = pickle.load(f)
+                if isinstance(loaded, dict):
+                    self.__dict__.update(loaded)
+                else:
+                    self.__dict__.update(loaded.__dict__)
 
     def dump(self) -> None:
         if self.path is not None:
             self.path.parent.mkdir(parents=True, exist_ok=True)
-            pickle.dump(self, self.path.open("wb"))
+            pickle.dump(self.__dict__, self.path.open("wb"))
         else:
             logger.warning("KnowledgeBase path is not set, dump failed.")
diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
index b696cd46..528bd68b 100644
--- a/rdagent/scenarios/kaggle/experiment/prompts.yaml
+++ b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -11,8 +11,7 @@ kg_description_template:
       "Competition Features": "Two-line description of the overall features involved within the competition as background."
       "Submission Specifications": "The submission specification & sample submission csv descriptions for the model to output."
       "Submission channel number to each sample": "The number of channels in the output for each sample, e.g., 1 for regression, N for N class classification with probabilities, etc. A Integer. If not specified, it is 1."
-      "Evaluation Description": "A brief description for what metrics are used in evaluation. An explanation of whether a higher score is better or lower is better in terms of performance."
-      "Evaluation Boolean": "True" or "False" (True means the higher score the better (like accuracy); False means the lower value the better (like loss).) 
+      "Evaluation Description": "A brief description of the metrics used in the evaluation. Please note that if `evaluation_metric_direction` is True, it indicates that higher values are better; if False, lower values are preferred."
     }
     Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.
 
@@ -22,6 +21,8 @@ kg_description_template:
     {{ competition_descriptions }}
     The raw data information:
     {{ raw_data_information }}
+    Evaluation_metric_direction: 
+    {{ evaluation_metric_direction }}
 
 kg_background: |-
   You are solving a data science tasks and the type of the competition is {{ competition_type }}.
diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py
index 3906e2d5..2e32edf7 100644
--- a/rdagent/scenarios/kaggle/experiment/scenario.py
+++ b/rdagent/scenarios/kaggle/experiment/scenario.py
@@ -50,7 +50,8 @@ def __init__(self, competition: str) -> None:
         self.submission_specifications = None
         self.model_output_channel = None
         self.evaluation_desc = None
-        self.evaluation_metric_direction = None
+        self.leaderboard = leaderboard_scores(competition)
+        self.evaluation_metric_direction = float(self.leaderboard[0]) > float(self.leaderboard[-1])
         self.vector_base = None
         self.mini_case = KAGGLE_IMPLEMENT_SETTING.mini_case
         self._analysis_competition_description()
@@ -75,8 +76,6 @@ def __init__(self, competition: str) -> None:
         self.confidence_parameter = 1.0
         self.initial_performance = 0.0
 
-        self.leaderboard = leaderboard_scores(competition)
-
     def _analysis_competition_description(self):
         sys_prompt = (
             Environment(undefined=StrictUndefined)
@@ -90,6 +89,7 @@ def _analysis_competition_description(self):
             .render(
                 competition_descriptions=self.competition_descriptions,
                 raw_data_information=self._source_data,
+                evaluation_metric_direction=self.evaluation_metric_direction,
             )
         )
 
@@ -111,9 +111,6 @@ def _analysis_competition_description(self):
         self.evaluation_desc = response_json_analysis.get(
             "Evaluation Description", "No evaluation specification provided."
         )
-        self.evaluation_metric_direction = response_json_analysis.get(
-            "Evaluation Boolean", "No evaluation specification provided."
-        )
 
     def get_competition_full_desc(self) -> str:
         evaluation_direction = "higher the better" if self.evaluation_metric_direction else "lower the better"