microsoft · xisen-w · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024
diff --git a/rdagent/app/kaggle/loop.py b/rdagent/app/kaggle/loop.py
@@ -103,21 +103,22 @@ def running(self, prev_out: dict[str, Any]):
             if KAGGLE_IMPLEMENT_SETTING.auto_submit:
                 csv_path = exp.experiment_workspace.workspace_path / "submission.csv"
                 try:
-                    subprocess.run(
-                        [
-                            "kaggle",
-                            "competitions",
-                            "submit",
-                            "-f",
-                            str(csv_path.absolute()),
-                            "-m",
-                            str(csv_path.parent.absolute()),
-                            KAGGLE_IMPLEMENT_SETTING.competition,
-                        ],
-                        check=True,
-                    )
+                    command = [
+                        "kaggle",
+                        "competitions",
+                        "submit",
+                        "-f",
+                        str(csv_path.absolute()),
+                        "-m",
+                        str(csv_path.parent.absolute()),
+                        KAGGLE_IMPLEMENT_SETTING.competition,
+                    ]
+                    logger.info(f"Executing Kaggle API command: {' '.join(command)}")
+                    result = subprocess.run(command, check=True, capture_output=True, text=True)
+                    logger.info(f"Kaggle API output: {result.stdout}")
                 except subprocess.CalledProcessError as e:
                     logger.error(f"Auto submission failed: \n{e}")
+                    logger.error(f"Kaggle API error output: {e.stderr}")
                 except Exception as e:
                     logger.error(f"Other exception when use kaggle api:\n{e}")
 

diff --git a/rdagent/log/ui/app.py b/rdagent/log/ui/app.py
@@ -88,6 +88,9 @@
 if "lround" not in state:
     state.lround = 0  # RD Loop Round
 
+if "times" not in state:
+    state.times = defaultdict(lambda: defaultdict(list))
+
 if "erounds" not in state:
     state.erounds = defaultdict(int)  # Evolving Rounds in each RD Loop
 
@@ -186,6 +189,17 @@ def get_msgs_until(end_func: Callable[[Message], bool] = lambda _: True):
                             )
 
                     state.msgs[state.lround][msg.tag].append(msg)
+
+                    # Update Times
+                    if "init" in tags:
+                        state.times[state.lround]["init"].append(msg.timestamp)
+                    if "r" in tags:
+                        state.times[state.lround]["r"].append(msg.timestamp)
+                    if "d" in tags:
+                        state.times[state.lround]["d"].append(msg.timestamp)
+                    if "ef" in tags:
+                        state.times[state.lround]["ef"].append(msg.timestamp)
+
                     # Stop Getting Logs
                     if end_func(msg):
                         break
@@ -224,6 +238,7 @@ def refresh(same_trace: bool = False):
     state.last_msg = None
     state.current_tags = []
     state.alpha158_metrics = None
+    state.times = defaultdict(lambda: defaultdict(list))
 
 
 def evolving_feedback_window(wsf: FactorSingleFeedback | ModelCoderFeedback):
@@ -741,6 +756,18 @@ def evolving_window():
             st.markdown(state.scenario.rich_style_description + css, unsafe_allow_html=True)
 
 
+def show_times(round: int):
+    for k, v in state.times[round].items():
+        if len(v) > 1:
+            diff = v[-1] - v[0]
+        else:
+            diff = v[0] - v[0]
+        total_seconds = diff.seconds
+        seconds = total_seconds % 60
+        minutes = total_seconds // 60
+        st.markdown(f"**:blue[{k}]**: :red[**{minutes}**] minutes :orange[**{seconds}**] seconds")
+
+
 if state.scenario is not None:
     summary_window()
 
@@ -754,8 +781,12 @@ def evolving_window():
             round = st.radio("**Loops**", horizontal=True, options=r_options, index=state.lround - 1)
         else:
             round = 1
+
+        show_times(round)
         rf_c, d_c = st.columns([2, 2])
     elif isinstance(state.scenario, GeneralModelScenario):
+        show_times(round)
+
         rf_c = st.container()
         d_c = st.container()
         round = 1

diff --git a/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/model/select_lightgbm.py b/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/model/select_lightgbm.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/model/select_randomforest.py b/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/model/select_randomforest.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/train.py b/rdagent/scenarios/kaggle/experiment/digit-recognizer_template/train.py
@@ -75,14 +75,14 @@ def import_module_from_path(module_name, module_path):
     metrics_all.append(accuracy)
 
 # 5) Save the validation accuracy
-min_index = np.argmax(metrics_all)
-pd.Series(data=[metrics_all[min_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
+max_index = np.argmax(metrics_all)
+pd.Series(data=[metrics_all[max_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
 
 # 6) Submit predictions for the test
 ids = range(1, len(X_test) + 1)
 
 # TODO: fix selection
 print(X_valid_selected.columns)
-y_test_pred = model_l[min_index][1](model_l[min_index][0], model_l[min_index][2].select(X_test)).flatten()
+y_test_pred = model_l[max_index][1](model_l[max_index][0], model_l[max_index][2].select(X_test)).flatten()
 submission_result = pd.DataFrame({"ImageId": ids, "Label": y_test_pred})
 submission_result.to_csv("submission.csv", index=False)
diff --git a/...cenarios/kaggle/experiment/forest-cover-type-prediction_template/model/select_lightgbm.py b/...cenarios/kaggle/experiment/forest-cover-type-prediction_template/model/select_lightgbm.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/forest-cover-type-prediction_template/train.py b/rdagent/scenarios/kaggle/experiment/forest-cover-type-prediction_template/train.py
@@ -76,12 +76,12 @@ def import_module_from_path(module_name, module_path):
     metrics_all.append(accuracy)
 
 # 5) Save the validation accuracy
-min_index = np.argmax(metrics_all)
-pd.Series(data=[metrics_all[min_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
+max_index = np.argmax(metrics_all)
+pd.Series(data=[metrics_all[max_index]], index=["multi-class accuracy"]).to_csv("submission_score.csv")
 
 # 6) Make predictions on the test set and save them
-X_test_selected = model_l[min_index][2].select(X_test.copy())
-y_test_pred = model_l[min_index][1](model_l[min_index][0], X_test_selected).flatten() + 1
+X_test_selected = model_l[max_index][2].select(X_test.copy())
+y_test_pred = model_l[max_index][1](model_l[max_index][0], X_test_selected).flatten() + 1
 
 
 # 7) Submit predictions for the test set

diff --git a/...aggle/experiment/optiver-realized-volatility-prediction_template/model/select_lightgbm.py b/...aggle/experiment/optiver-realized-volatility-prediction_template/model/select_lightgbm.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/...rios/kaggle/experiment/optiver-realized-volatility-prediction_template/model/select_nn.py b/...rios/kaggle/experiment/optiver-realized-volatility-prediction_template/model/select_nn.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/...ent/scenarios/kaggle/experiment/playground-series-s3e11_template/model/select_lightgbm.py b/...ent/scenarios/kaggle/experiment/playground-series-s3e11_template/model/select_lightgbm.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s3e11_template/model/select_nn.py b/rdagent/scenarios/kaggle/experiment/playground-series-s3e11_template/model/select_nn.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/fea_share_preprocess.py b/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/fea_share_preprocess.py
@@ -0,0 +1,38 @@
+import os
+
+import numpy as np  # linear algebra
+import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
+from sklearn.model_selection import train_test_split
+
+
+def preprocess_script():
+    """
+    This method applies the preprocessing steps to the training, validation, and test datasets.
+    """
+    if os.path.exists("/kaggle/input/X_train.pkl"):
+        X_train = pd.read_pickle("/kaggle/input/X_train.pkl")
+        X_valid = pd.read_pickle("/kaggle/input/X_valid.pkl")
+        y_train = pd.read_pickle("/kaggle/input/y_train.pkl")
+        y_valid = pd.read_pickle("/kaggle/input/y_valid.pkl")
+        X_test = pd.read_pickle("/kaggle/input/X_test.pkl")
+        others = pd.read_pickle("/kaggle/input/others.pkl")
+        y_train = pd.Series(y_train).reset_index(drop=True)
+        y_valid = pd.Series(y_valid).reset_index(drop=True)
+
+        return X_train, X_valid, y_train, y_valid, X_test, *others
+
+    # train
+    train = pd.read_csv("/kaggle/input/train.csv")
+    X_train, X_valid, y_train, y_valid = train_test_split(
+        train.drop(["yield", "id"], axis=1), train["yield"], test_size=0.2, random_state=2023
+    )
+    y_train = pd.Series(y_train).reset_index(drop=True)
+    y_valid = pd.Series(y_valid).reset_index(drop=True)
+
+    # test
+    test = pd.read_csv("/kaggle/input/test.csv")
+
+    ids = test["id"]
+    X_test = test.drop(["id"], axis=1)
+
+    return X_train, X_valid, y_train, y_valid, X_test, ids
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/feature/feature.py b/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/feature/feature.py
@@ -0,0 +1,23 @@
+import pandas as pd
+
+"""
+Here is the feature engineering code for each task, with a class that has a fit and transform method.
+Remember
+"""
+
+
+class IdentityFeature:
+    def fit(self, train_df: pd.DataFrame):
+        """
+        Fit the feature engineering model to the training data.
+        """
+        pass
+
+    def transform(self, X: pd.DataFrame):
+        """
+        Transform the input data.
+        """
+        return X
+
+
+feature_engineering_cls = IdentityFeature
diff --git a/.../scenarios/kaggle/experiment/playground-series-s3e14_template/model/model_randomforest.py b/.../scenarios/kaggle/experiment/playground-series-s3e14_template/model/model_randomforest.py
@@ -0,0 +1,27 @@
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+
+
+def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_valid: pd.DataFrame):
+    """Define and train the Random Forest model. Merge feature_select"""
+    rf_params = {
+        "n_estimators": 100,
+        "max_depth": 10,
+        "min_samples_split": 2,
+        "min_samples_leaf": 1,
+        "max_features": "sqrt",
+        "random_state": 2023,
+        "n_jobs": -1,
+        "verbose": 1,
+    }
+    model = RandomForestRegressor(**rf_params)
+    model.fit(X_train, y_train)
+    return model
+
+
+def predict(model, X_test):
+    """
+    Keep feature select's consistency.
+    """
+    y_pred = model.predict(X_test)
+    return y_pred.reshape(-1, 1)
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/model_xgboost.py b/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/model_xgboost.py
@@ -0,0 +1,34 @@
+"""
+motivation  of the model
+"""
+
+import pandas as pd
+import xgboost as xgb
+
+
+def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_valid: pd.DataFrame):
+    """Define and train the model. Merge feature_select"""
+    xgb_params = {
+        "n_estimators": 280,
+        "learning_rate": 0.05,
+        "max_depth": 10,
+        "subsample": 1.0,
+        "colsample_bytree": 1.0,
+        "tree_method": "hist",
+        "enable_categorical": True,
+        "verbosity": 1,
+        "min_child_weight": 3,
+        "base_score": 4.6,
+        "random_state": 2023,
+    }
+    model = xgb.XGBRegressor(**xgb_params)
+    model.fit(X_train, y_train)
+    return model
+
+
+def predict(model, X_test):
+    """
+    Keep feature select's consistency.
+    """
+    y_pred = model.predict(X_test)
+    return y_pred.reshape(-1, 1)
diff --git a/...ent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_lightgbm.py b/...ent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_lightgbm.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_nn.py b/rdagent/scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_nn.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X
diff --git a/...scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_randomforest.py b/...scenarios/kaggle/experiment/playground-series-s3e14_template/model/select_randomforest.py
@@ -0,0 +1,12 @@
+import pandas as pd
+
+
+def select(X: pd.DataFrame) -> pd.DataFrame:
+    """
+    Select relevant features. To be used in fit & predict function.
+    """
+    # For now, we assume all features are relevant. This can be expanded to feature selection logic.
+    if X.columns.nlevels == 1:
+        return X
+    X.columns = ["_".join(str(i) for i in col).strip() for col in X.columns.values]
+    return X