Skip to content

Commit

Permalink
Update the Challenge 2 Notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylo committed Nov 15, 2023
1 parent f7ff7c6 commit 79b5b9d
Showing 1 changed file with 33 additions and 31 deletions.
64 changes: 33 additions & 31 deletions doxa-challenges/challenge-2/starter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install numpy pandas matplotlib seaborn scikit-learn doxa-cli ipympl"
"%pip install numpy pandas matplotlib seaborn scikit-learn ipympl\n",
"%pip install -U doxa-cli"
]
},
{
Expand Down Expand Up @@ -112,11 +113,7 @@
"metadata": {},
"outputs": [],
"source": [
"device = (\n",
" \"cuda\"\n",
" if torch.cuda.is_available()\n",
" else \"cpu\"\n",
")\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"print(f\"Using {device} device\")"
]
},
Expand All @@ -142,7 +139,7 @@
"source": [
"# Load the data\n",
"# !pip install ucimlrepo # uncomment this line if ucimlrepo is not installed\n",
"from ucimlrepo import fetch_ucirepo \n"
"from ucimlrepo import fetch_ucirepo"
]
},
{
Expand All @@ -151,10 +148,10 @@
"metadata": {},
"outputs": [],
"source": [
"# fetch dataset \n",
"wine_quality = fetch_ucirepo(id=186) \n",
" \n",
"# data (as pandas dataframes) \n",
"# fetch dataset\n",
"wine_quality = fetch_ucirepo(id=186)\n",
"\n",
"# data (as pandas dataframes)\n",
"df = wine_quality.data.features"
]
},
Expand All @@ -174,7 +171,7 @@
"source": [
"# TODO: Print the first five rows of the data\n",
"\n",
"# Hint: use the '.head()' method\n"
"# Hint: use the '.head()' method"
]
},
{
Expand All @@ -183,9 +180,9 @@
"metadata": {},
"outputs": [],
"source": [
"# TODO: Print the number of rows and columns in the dataset \n",
"# TODO: Print the number of rows and columns in the dataset\n",
"\n",
"# Hint: use '.shape'\n"
"# Hint: use '.shape'"
]
},
{
Expand All @@ -196,7 +193,7 @@
"source": [
"# TODO: Print the summary statistics for the dataset\n",
"\n",
"# Hint: use '.describe()'\n"
"# Hint: use '.describe()'"
]
},
{
Expand All @@ -221,10 +218,10 @@
"source": [
"# Important: do not change this cell!\n",
"\n",
"# We split the data into X and y variables. X are the features and y is the target variable. we wand to predict. \n",
"# We are trying to predict the alcohol content given the other variables. \n",
"X = df.drop('alcohol', axis=1)\n",
"y = df['alcohol']\n",
"# We split the data into X and y variables. X are the features and y is the target variable. we wand to predict.\n",
"# We are trying to predict the alcohol content given the other variables.\n",
"X = df.drop(\"alcohol\", axis=1)\n",
"y = df[\"alcohol\"]\n",
"\n",
"# We done covert the Matrix X and vector y in numpy arrays.\n",
"X = X.to_numpy()\n",
Expand Down Expand Up @@ -266,10 +263,8 @@
"num_input_features, num_hidden_neurons = X_train.shape[1], 10\n",
"model = nn.Sequential(\n",
" # TODO: add layers to our model\n",
"\n",
" # Note: remember that we are trying to predict a continuous variable.\n",
" # Our output layer should have only one neuron, and our input layer should be the number of columns in X.\n",
"\n",
")\n",
"\n",
"# Move model to GPU if available\n",
Expand Down Expand Up @@ -313,7 +308,7 @@
"source": [
"# TODO: replace 'None' with your loss function.\n",
"\n",
"# Hint: we are trying to predict a continuous variable.\n"
"# Hint: we are trying to predict a continuous variable."
]
},
{
Expand Down Expand Up @@ -346,7 +341,7 @@
"metadata": {},
"outputs": [],
"source": [
"#Keep track of losses\n",
"# Keep track of losses\n",
"plotlosses = PlotLosses()\n",
"\n",
"# Convert our training data to tensors\n",
Expand Down Expand Up @@ -400,12 +395,12 @@
"with torch.no_grad():\n",
" # Print the loss on the training data\n",
" y_pred_train = model(torch.from_numpy(X_train).float().to(device)).numpy()\n",
" mse_loss_train = np.mean((y_pred_train - y_train)**2) # Mean Square Error loss\n",
" mse_loss_train = np.mean((y_pred_train - y_train) ** 2) # Mean Square Error loss\n",
" print(f\"Train MSE loss: {mse_loss_train:.2f}\")\n",
" \n",
"\n",
" # Print the loss on the test data\n",
" y_pred_test = model(torch.from_numpy(X_test).float().to(device)).numpy()\n",
" mse_loss_test = np.mean((y_pred_test - y_test)**2) # Mean Square Error loss\n",
" mse_loss_test = np.mean((y_pred_test - y_test) ** 2) # Mean Square Error loss\n",
"\n",
" print(f\"Test MSE loss: {mse_loss_test:.2f}\")"
]
Expand All @@ -425,11 +420,11 @@
"source": [
"# Pass our data through our neural network\n",
"model.eval()\n",
"with torch.no_grad(): \n",
"with torch.no_grad():\n",
" # Print the loss on the test data\n",
" predictions = model(torch.from_numpy(X_test).float().to(device)).numpy().squeeze()\n",
"\n",
"assert predictions.shape == (1300,) \n",
"assert predictions.shape == (1300,)\n",
"\n",
"# Take a look at the first 20 predictions\n",
"predictions[:20]"
Expand All @@ -441,17 +436,24 @@
"metadata": {},
"outputs": [],
"source": [
"# Prepare our submission package\n",
"os.makedirs(\"submission\", exist_ok=True)\n",
"\n",
"with open(\"submission/y.txt\", \"w\") as f:\n",
" f.writelines([f\"{prediction}\\n\" for prediction in predictions])\n",
"\n",
"with open(\"submission/doxa.yaml\", \"w\") as f:\n",
" f.write(\"competition: epl\\nenvironment: cpu\\nlanguage: python\\nentrypoint: run.py\")\n",
" f.write(\n",
" \"competition: uclais-2023-2\\nenvironment: cpu\\nlanguage: python\\nentrypoint: run.py\"\n",
" )\n",
"\n",
"with open(\"submission/run.py\", \"w\") as f:\n",
" f.write(\"with open('y.txt', 'r') as f: print(f.read().strip())\")"
" f.write(\n",
" \"\"\"import os\n",
"\n",
"with open('y.txt', 'r') as f:\n",
" with open(os.environ[\"DOXA_STREAMS\"] + \"/out\", \"w\") as g:\n",
" g.write(f.read().strip())\"\"\"\n",
" )"
]
},
{
Expand Down

0 comments on commit 79b5b9d

Please sign in to comment.