diff --git a/doxa-challenges/challenge-2/starter.ipynb b/doxa-challenges/challenge-2/starter.ipynb index e227082..fc5991a 100644 --- a/doxa-challenges/challenge-2/starter.ipynb +++ b/doxa-challenges/challenge-2/starter.ipynb @@ -53,7 +53,8 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install numpy pandas matplotlib seaborn scikit-learn doxa-cli ipympl" + "%pip install numpy pandas matplotlib seaborn scikit-learn ipympl\n", + "%pip install -U doxa-cli" ] }, { @@ -112,11 +113,7 @@ "metadata": {}, "outputs": [], "source": [ - "device = (\n", - " \"cuda\"\n", - " if torch.cuda.is_available()\n", - " else \"cpu\"\n", - ")\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(f\"Using {device} device\")" ] }, @@ -142,7 +139,7 @@ "source": [ "# Load the data\n", "# !pip install ucimlrepo # uncomment this line if ucimlrepo is not installed\n", - "from ucimlrepo import fetch_ucirepo \n" + "from ucimlrepo import fetch_ucirepo" ] }, { @@ -151,10 +148,10 @@ "metadata": {}, "outputs": [], "source": [ - "# fetch dataset \n", - "wine_quality = fetch_ucirepo(id=186) \n", - " \n", - "# data (as pandas dataframes) \n", + "# fetch dataset\n", + "wine_quality = fetch_ucirepo(id=186)\n", + "\n", + "# data (as pandas dataframes)\n", "df = wine_quality.data.features" ] }, @@ -174,7 +171,7 @@ "source": [ "# TODO: Print the first five rows of the data\n", "\n", - "# Hint: use the '.head()' method\n" + "# Hint: use the '.head()' method" ] }, { @@ -183,9 +180,9 @@ "metadata": {}, "outputs": [], "source": [ - "# TODO: Print the number of rows and columns in the dataset \n", + "# TODO: Print the number of rows and columns in the dataset\n", "\n", - "# Hint: use '.shape'\n" + "# Hint: use '.shape'" ] }, { @@ -196,7 +193,7 @@ "source": [ "# TODO: Print the summary statistics for the dataset\n", "\n", - "# Hint: use '.describe()'\n" + "# Hint: use '.describe()'" ] }, { @@ -221,10 +218,10 @@ "source": [ "# Important: do not change this cell!\n", "\n", - "# We split the data into X and y variables. X are the features and y is the target variable. we wand to predict. \n", - "# We are trying to predict the alcohol content given the other variables. \n", - "X = df.drop('alcohol', axis=1)\n", - "y = df['alcohol']\n", + "# We split the data into X and y variables. X are the features and y is the target variable. we wand to predict.\n", + "# We are trying to predict the alcohol content given the other variables.\n", + "X = df.drop(\"alcohol\", axis=1)\n", + "y = df[\"alcohol\"]\n", "\n", "# We done covert the Matrix X and vector y in numpy arrays.\n", "X = X.to_numpy()\n", @@ -266,10 +263,8 @@ "num_input_features, num_hidden_neurons = X_train.shape[1], 10\n", "model = nn.Sequential(\n", " # TODO: add layers to our model\n", - "\n", " # Note: remember that we are trying to predict a continuous variable.\n", " # Our output layer should have only one neuron, and our input layer should be the number of columns in X.\n", - "\n", ")\n", "\n", "# Move model to GPU if available\n", @@ -313,7 +308,7 @@ "source": [ "# TODO: replace 'None' with your loss function.\n", "\n", - "# Hint: we are trying to predict a continuous variable.\n" + "# Hint: we are trying to predict a continuous variable." ] }, { @@ -346,7 +341,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Keep track of losses\n", + "# Keep track of losses\n", "plotlosses = PlotLosses()\n", "\n", "# Convert our training data to tensors\n", @@ -400,12 +395,12 @@ "with torch.no_grad():\n", " # Print the loss on the training data\n", " y_pred_train = model(torch.from_numpy(X_train).float().to(device)).numpy()\n", - " mse_loss_train = np.mean((y_pred_train - y_train)**2) # Mean Square Error loss\n", + " mse_loss_train = np.mean((y_pred_train - y_train) ** 2) # Mean Square Error loss\n", " print(f\"Train MSE loss: {mse_loss_train:.2f}\")\n", - " \n", + "\n", " # Print the loss on the test data\n", " y_pred_test = model(torch.from_numpy(X_test).float().to(device)).numpy()\n", - " mse_loss_test = np.mean((y_pred_test - y_test)**2) # Mean Square Error loss\n", + " mse_loss_test = np.mean((y_pred_test - y_test) ** 2) # Mean Square Error loss\n", "\n", " print(f\"Test MSE loss: {mse_loss_test:.2f}\")" ] @@ -425,11 +420,11 @@ "source": [ "# Pass our data through our neural network\n", "model.eval()\n", - "with torch.no_grad(): \n", + "with torch.no_grad():\n", " # Print the loss on the test data\n", " predictions = model(torch.from_numpy(X_test).float().to(device)).numpy().squeeze()\n", "\n", - "assert predictions.shape == (1300,) \n", + "assert predictions.shape == (1300,)\n", "\n", "# Take a look at the first 20 predictions\n", "predictions[:20]" @@ -441,17 +436,24 @@ "metadata": {}, "outputs": [], "source": [ - "# Prepare our submission package\n", "os.makedirs(\"submission\", exist_ok=True)\n", "\n", "with open(\"submission/y.txt\", \"w\") as f:\n", " f.writelines([f\"{prediction}\\n\" for prediction in predictions])\n", "\n", "with open(\"submission/doxa.yaml\", \"w\") as f:\n", - " f.write(\"competition: epl\\nenvironment: cpu\\nlanguage: python\\nentrypoint: run.py\")\n", + " f.write(\n", + " \"competition: uclais-2023-2\\nenvironment: cpu\\nlanguage: python\\nentrypoint: run.py\"\n", + " )\n", "\n", "with open(\"submission/run.py\", \"w\") as f:\n", - " f.write(\"with open('y.txt', 'r') as f: print(f.read().strip())\")" + " f.write(\n", + " \"\"\"import os\n", + "\n", + "with open('y.txt', 'r') as f:\n", + " with open(os.environ[\"DOXA_STREAMS\"] + \"/out\", \"w\") as g:\n", + " g.write(f.read().strip())\"\"\"\n", + " )" ] }, {