Skip to content

Commit

Permalink
feat: support Multi output (#330)
Browse files Browse the repository at this point in the history
* Key changes

* Revised to support submission specifications

* Revised to support submission specifications

* revise CI

* CI-Fix

* fixing-CI

* Support COSTEER Multi-Dimension for output & bug-fix

* Revised to support submission specifications

* revise CI

* CI-Fix

* fixing-CI

* Support COSTEER Multi-Dimension for output & bug-fix

* Linting
  • Loading branch information
xisen-w authored Sep 25, 2024
1 parent cc0a86d commit 3d36c45
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
6 changes: 5 additions & 1 deletion rdagent/scenarios/kaggle/experiment/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,11 @@ kg_feature_simulator: |-
kg_model_output_format: |-
For feature related tasks, the output should be a pandas DataFrame with the new features. The columns should be the new features, and the rows should correspond to the number of samples in the input DataFrame.
For model related tasks, the output should be an np.ndarray with the appropriate number of predictions, each prediction being a single value. The output should be a 2D array with dimensions corresponding to the number of predictions and 1 column (e.g., (8, 1) if there are 8 predictions).
For model related tasks:
1. the output should be an np.ndarray with the appropriate number of predictions & the appropriate values within each prediction
2. the output should be a 2D array with dimensions corresponding to the number of predictions and the number of things to output. Eg, if 4 predictions, each prediction needs to predict 3 probabilities, then (4,3). Or (8, 1) if there are 8 predictions but each prediction is only one value.
3. please reference the competition's submission requirement and align with that.
Submission Requirements here:\n: {{submission_specifications}}
kg_model_simulator: |-
The models will be trained on the competition dataset and evaluated on their ability to predict the target. Metrics like accuracy and AUC-ROC is used to evaluate the model performance.
Expand Down
43 changes: 30 additions & 13 deletions rdagent/scenarios/kaggle/experiment/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,22 @@ def __init__(self, competition: str) -> None:
self.competition = competition
self.competition_descriptions = crawl_descriptions(competition)
self._source_data = self.source_data
self._output_format = self.output_format
self._interface = self.interface
self._simulator = self.simulator

self.competition_type = None
self.competition_description = None
self.target_description = None
self.competition_features = None
self.submission_specifications = None
self._analysis_competition_description()
self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB

# Move these assignments after _analysis_competition_description
self._output_format = self.output_format
self._interface = self.interface
self._simulator = self.simulator
self._background = self.background

self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB

def _analysis_competition_description(self):
sys_prompt = (
Environment(undefined=StrictUndefined)
Expand All @@ -61,14 +63,25 @@ def _analysis_competition_description(self):
json_mode=True,
)

response_json_analysis = json.loads(response_analysis)
self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
self.competition_description = response_json_analysis.get("Competition Description", "No description provided")
self.target_description = response_json_analysis.get("Target Description", "No target provided")
self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
self.submission_specifications = response_json_analysis.get(
"Submission Specifications", "No submission requirements provided"
)
try:
response_json_analysis = json.loads(response_analysis)
self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
self.competition_description = response_json_analysis.get(
"Competition Description", "No description provided"
)
self.target_description = response_json_analysis.get("Target Description", "No target provided")
self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
self.submission_specifications = response_json_analysis.get(
"Submission Specifications", "No submission requirements provided"
)
except json.JSONDecodeError:
print(f"Failed to parse JSON response: {response_analysis}")
# Set default values if JSON parsing fails
self.competition_type = "Unknown"
self.competition_description = "No description available"
self.target_description = "No target available"
self.competition_features = "No features available"
self.submission_specifications = "No submission requirements available"

def get_competition_full_desc(self) -> str:
return f"""Competition Type: {self.competition_type}
Expand Down Expand Up @@ -137,7 +150,11 @@ def source_data(self) -> str:

@property
def output_format(self) -> str:
return prompt_dict["kg_model_output_format"]
return (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["kg_model_output_format"])
.render(submission_specifications=self.submission_specifications)
)

@property
def interface(self) -> str:
Expand Down

0 comments on commit 3d36c45

Please sign in to comment.