Skip to content

Commit

Permalink
feat: Dynamic scenario based on task (#392)
Browse files Browse the repository at this point in the history
* OAI api

* more comments

* llm utils

* RD-Framework for dynamic scen

* remove useless code

* auto lint

* fix dynamic scennario

* Add parameter

* auto lint

* remove abstractmethod

* fix lint
  • Loading branch information
you-n-g authored Sep 29, 2024
1 parent c55ec0a commit 665a037
Show file tree
Hide file tree
Showing 12 changed files with 121 additions and 46 deletions.
18 changes: 15 additions & 3 deletions rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ def evaluate(
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_code_feedback_v1_system"])
.render(scenario=self.scen.get_scenario_all_desc() if self.scen is not None else "No scenario description.")
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)

execution_feedback_to_render = execution_feedback
Expand Down Expand Up @@ -167,7 +171,11 @@ def evaluate(
.from_string(
evaluate_prompts["evaluator_output_format_system"],
)
.render(scenario=self.scen.get_scenario_all_desc() if self.scen is not None else "No scenario description.")
.render(
scenario=self.scen.get_scenario_all_desc(implementation.target_task)
if self.scen is not None
else "No scenario description."
)
)

# TODO: with retry_context(retry_n=3, except_list=[KeyError]):
Expand Down Expand Up @@ -473,7 +481,11 @@ def evaluate(
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_final_decision_v1_system"])
.render(scenario=self.scen.get_scenario_all_desc() if self.scen is not None else "No scenario description.")
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)
execution_feedback_to_render = execution_feedback

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def implement_one_factor(
implement_prompts["evolving_strategy_factor_implementation_v1_system"],
)
.render(
scenario=self.scen.get_scenario_all_desc(),
scenario=self.scen.get_scenario_all_desc(target_task),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
)
Expand Down Expand Up @@ -226,7 +226,7 @@ def implement_one_factor(
implement_prompts["evolving_strategy_factor_implementation_v1_system"],
)
.render(
scenario=self.scen.get_scenario_all_desc(),
scenario=self.scen.get_scenario_all_desc(target_task),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
)
Expand All @@ -250,7 +250,7 @@ def implement_one_factor(
Environment(undefined=StrictUndefined)
.from_string(implement_prompts["evolving_strategy_error_summary_v2_system"])
.render(
scenario=self.scen.get_scenario_all_desc(),
scenario=self.scen.get_scenario_all_desc(target_task),
factor_information_str=target_factor_task_information,
code_and_feedback=queried_former_failed_knowledge_to_render[
-1
Expand Down
12 changes: 10 additions & 2 deletions rdagent/components/coder/model_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ def evaluate(
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_code_feedback"]["system"])
.render(scenario=self.scen.get_scenario_all_desc() if self.scen is not None else "No scenario description.")
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)

execution_feedback_to_render = model_execution_feedback
Expand Down Expand Up @@ -145,7 +149,11 @@ def evaluate(
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_final_feedback"]["system"])
.render(scenario=self.scen.get_scenario_all_desc() if self.scen is not None else "No scenario description.")
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)

execution_feedback_to_render = model_execution_feedback
Expand Down
25 changes: 21 additions & 4 deletions rdagent/core/scenario.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
from abc import ABC, abstractmethod

from rdagent.core.experiment import Task


class Scenario(ABC):
@property
@abstractmethod
def background(self) -> str:
"""Background information"""

# TODO: We have to change all the sub classes to override get_source_data_desc instead of `source_data`
def get_source_data_desc(self, task: Task | None = None) -> str: # noqa: ARG002
"""
Source data description
The choice of data may vary based on the specific task at hand.
"""
return ""

@property
@abstractmethod
def source_data(self) -> str:
"""Source data description"""
"""
A convenient shortcut for describing source data
"""
return self.get_source_data_desc()

@property
@abstractmethod
Expand All @@ -33,8 +46,12 @@ def rich_style_description(self) -> str:
"""Rich style description to present"""

@abstractmethod
def get_scenario_all_desc(self) -> str:
"""Combine all the description together"""
def get_scenario_all_desc(self, task: Task | None = None) -> str:
"""
Combine all descriptions together
The scenario description varies based on the task being performed.
"""

@property
def experiment_setting(self) -> str | None:
Expand Down
9 changes: 9 additions & 0 deletions rdagent/oai/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,15 @@ def display_history(self) -> None:


class APIBackend:
"""
This is a unified interface for different backends.
(xiao) thinks integerate all kinds of API in a single class is not a good design.
So we should split them into different classes in `oai/backends/` in the future.
"""

# FIXME: (xiao) We should avoid using self.xxxx.
# Instead, we can use self.cfg directly. If it's difficult to support different backend settings, we can split them into multiple BaseSettings.
def __init__( # noqa: C901, PLR0912, PLR0915
self,
*,
Expand Down
3 changes: 2 additions & 1 deletion rdagent/scenarios/data_mining/experiment/model_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ModelFBWorkspace,
ModelTask,
)
from rdagent.core.experiment import Task
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario
from rdagent.scenarios.data_mining.experiment.workspace import DMFBWorkspace
Expand Down Expand Up @@ -62,7 +63,7 @@ def rich_style_description(self) -> str:
To demonstrate the dynamic evolution of models through the R&D loop, emphasizing how each iteration enhances the model performance and reliability. The performane is measured by the AUROC score (Area Under the Receiver Operating Characteristic), which is a commonly used metric for binary classification. """

def get_scenario_all_desc(self) -> str:
def get_scenario_all_desc(self, task: Task | None = None) -> str:
return f"""Background of the scenario:
{self.background}
The interface you should follow to write the runnable code:
Expand Down
3 changes: 2 additions & 1 deletion rdagent/scenarios/general_model/scenario.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from copy import deepcopy
from pathlib import Path

from rdagent.core.experiment import Task
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario

Expand Down Expand Up @@ -40,7 +41,7 @@ def simulator(self) -> str:
def rich_style_description(self) -> str:
return self._rich_style_description

def get_scenario_all_desc(self) -> str:
def get_scenario_all_desc(self, task: Task | None = None) -> str:
return f"""Background of the scenario:
{self.background}
The interface you should follow to write the runnable code:
Expand Down
3 changes: 2 additions & 1 deletion rdagent/scenarios/kaggle/experiment/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jinja2 import Environment, StrictUndefined

from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING
from rdagent.core.experiment import Task
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
Expand Down Expand Up @@ -203,7 +204,7 @@ def rich_style_description(self) -> str:
This is the Kaggle scenario for the competition: {self.competition}
"""

def get_scenario_all_desc(self) -> str:
def get_scenario_all_desc(self, task: Task | None = None) -> str:
return f"""Background of the scenario:
{self._background}
The source dataset you can use to generate the features:
Expand Down
9 changes: 5 additions & 4 deletions rdagent/scenarios/qlib/experiment/factor_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
FactorFBWorkspace,
FactorTask,
)
from rdagent.core.experiment import Task
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario
from rdagent.scenarios.qlib.experiment.utils import get_data_folder_intro
Expand Down Expand Up @@ -36,8 +37,7 @@ def __init__(self) -> None:
def background(self) -> str:
return self._background

@property
def source_data(self) -> str:
def get_source_data_desc(self, task: Task | None = None) -> str:
return self._source_data

@property
Expand All @@ -60,11 +60,12 @@ def rich_style_description(self) -> str:
def experiment_setting(self) -> str:
return self._experiment_setting

def get_scenario_all_desc(self) -> str:
def get_scenario_all_desc(self, task: Task | None = None) -> str:
"""A static scenario describer"""
return f"""Background of the scenario:
{self.background}
The source data you can use:
{self.source_data}
{self.get_source_data_desc(task)}
The interface you should follow to write the runnable code:
{self.interface}
The output of your code should be in the format:
Expand Down
3 changes: 2 additions & 1 deletion rdagent/scenarios/qlib/experiment/model_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ModelFBWorkspace,
ModelTask,
)
from rdagent.core.experiment import Task
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario
from rdagent.scenarios.qlib.experiment.workspace import QlibFBWorkspace
Expand Down Expand Up @@ -57,7 +58,7 @@ def rich_style_description(self) -> str:
def experiment_setting(self) -> str:
return self._experiment_setting

def get_scenario_all_desc(self) -> str:
def get_scenario_all_desc(self, task: Task | None = None) -> str:
return f"""Background of the scenario:
{self.background}
The interface you should follow to write the runnable code:
Expand Down
66 changes: 42 additions & 24 deletions rdagent/scenarios/qlib/experiment/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import shutil
from pathlib import Path

Expand Down Expand Up @@ -49,15 +50,31 @@ def generate_data_folder_from_qlib():
)


def get_data_folder_intro():
"""Directly get the info of the data folder.
def get_data_folder_intro(fname_reg: str = ".*", flags=0) -> str:
"""
Directly get the info of the data folder.
It is for preparing prompting message.
Parameters
----------
fname_reg : str
a regular expression to filter the file name.
flags: str
flags for re.match
Returns
-------
str
The description of the data folder.
"""

if (
not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder).exists()
or not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).exists()
):
# FIXME: (xiao) I think this is writing in a hard-coded way.
# get data folder intro does not imply that we are generating the data folder.
generate_data_folder_from_qlib()

JJ_TPL = Environment(undefined=StrictUndefined).from_string(
Expand All @@ -70,29 +87,30 @@ def get_data_folder_intro():
)
content_l = []
for p in Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).iterdir():
if p.name.endswith(".h5"):
df = pd.read_hdf(p)
# get df.head() as string with full width
pd.set_option("display.max_columns", None) # or 1000
pd.set_option("display.max_rows", None) # or 1000
pd.set_option("display.max_colwidth", None) # or 199
rendered = JJ_TPL.render(
file_name=p.name,
type_desc="generated by `pd.read_hdf(filename).head()`",
content=df.head().to_string(),
)
content_l.append(rendered)
elif p.name.endswith(".md"):
with open(p) as f:
content = f.read()
if re.match(fname_reg, p.name, flags) is not None:
if p.name.endswith(".h5"):
df = pd.read_hdf(p)
# get df.head() as string with full width
pd.set_option("display.max_columns", None) # or 1000
pd.set_option("display.max_rows", None) # or 1000
pd.set_option("display.max_colwidth", None) # or 199
rendered = JJ_TPL.render(
file_name=p.name,
type_desc="markdown",
content=content,
type_desc="generated by `pd.read_hdf(filename).head()`",
content=df.head().to_string(),
)
content_l.append(rendered)
else:
raise NotImplementedError(
f"file type {p.name} is not supported. Please implement its description function.",
)
return "\n ----------------- file splitter -------------\n".join(content_l)
elif p.name.endswith(".md"):
with open(p) as f:
content = f.read()
rendered = JJ_TPL.render(
file_name=p.name,
type_desc="markdown",
content=content,
)
content_l.append(rendered)
else:
raise NotImplementedError(
f"file type {p.name} is not supported. Please implement its description function.",
)
return "\n----------------- file splitter -------------\n".join(content_l)
10 changes: 8 additions & 2 deletions rdagent/utils/agent/tpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@


# class T(SingletonBaseClass): TODO: singleton does not support args now.
class T:
"""Use the simplest way to (C)reate a Template and (r)ender it!!"""
class RDAT:
"""
RD-Agent's Template
Use the simplest way to (C)reate a Template and (r)ender it!!
"""

def __init__(self, uri: str):
"""
Expand Down Expand Up @@ -61,3 +64,6 @@ def r(self, **context: Any):
Render the template with the given context.
"""
return Environment(undefined=StrictUndefined).from_string(self.template).render(**context)


T = RDAT # shortcuts

0 comments on commit 665a037

Please sign in to comment.