From 5a2b2d6e6830c349e63c5bad18d8da91cfe75ba3 Mon Sep 17 00:00:00 2001
From: Haoxue Wang <whx924@gmail.com>
Date: Thu, 6 Jun 2024 08:18:33 +0000
Subject: [PATCH] load_data

---
 rdagent/benchmark/data_process.py | 78 ++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/rdagent/benchmark/data_process.py b/rdagent/benchmark/data_process.py
index 5af93864..eb791367 100644
--- a/rdagent/benchmark/data_process.py
+++ b/rdagent/benchmark/data_process.py
@@ -3,10 +3,84 @@
 from typing import List
 from rdagent.core.task import FactorTask, TestCase
 # TODO:Need to verify the type of input data，how to deal with the gt
-def load_eval_data(version: TASK_VERSION) -> List[TestCase]:
+
+# (haoxue) need to check the following code, it seems that there exists task.py 
+class task(object):
+    def __init__(self, task_name, task_description, task_formulation, task_formulation_description, variables: dict = {}, resource: str = None):
+        self.task_name = task_name
+        self.task_description = task_description
+        self.task_formulation = task_formulation
+        self.task_formulation_description = task_formulation_description
+        self.variables = variables
+        self.task_resources = resource
+    def task_key_adaptor(self, fname, task):
+        # FIXME: we should align the code and task to make the interface simpler
+        res = {"factor_name": fname}
+        for k, v in task.items():
+            res[
+                {
+                    "formulation": "factor_formulation",
+                    "description": "factor_description",
+                    "variable": "variables",
+                }.get(k, k)
+            ] = v
+            res.update(
+                {
+                    # "factor_description": "",
+                    "factor_formulation_description": "",
+                },
+            )
+        return res
+
+    def task_set_adaptor(self, task_set):
+        res = {}
+        for k, f_d in task_set.items():
+            new_f_d = [self.task_key_adaptor(f, d) for f, d in f_d.items()]
+            res[k] = new_f_d
+        return res
+
+    def load_all_task_json_disk(self, path):
+        with open(path) as f:
+            data = json.load(f)
+        return self.task_set_adaptor(data)
+
+
+def get_test_task_json(version: TASK_VERSION, path):
+    if version == "":
+        res=task().load_all_task_json_disk(path=path)
+    elif version == "random":
+        pass
+    elif version == "Naive":
+        pass
+    elif version == "CoT":
+        pass
+    elif version == "Past":
+        pass
+    else:
+        raise ValueError(f"Unknown version: {version}")
+    return res
+
+def load_tasks(data: dict, with_fname=False):
+    # TODO: we should put these into a staticmethod as BaseEval (maybe FactorImplementTask)
+    # load tasks from json
+    ft_l = []
+    for fname, factor_list in data.items():
+        for t in factor_list:
+            ft = FactorTask.from_dict(t)
+            # key in factor
+            if "variables" in t:
+                ft.factor_formulation_description = str(t["variables"])
+            if with_fname:
+                ft_l.append((fname, ft))
+            else:
+                ft_l.append(ft)
+    return ft_l
+
+def load_eval_data(version: TASK_VERSION, path) -> List[TestCase]:
     # prepare the input data used for generation
     # The process should contain: 1. read the factor/model info, 2. Prepare and check the gt
-    pass 
+    all_task_json = get_test_task_json(version, path)
+    return load_tasks(all_task_json)
 
 class FileBasedFactorImplementation(TaskImplementation):
     """