diff --git a/analysis/hf_reanalysis/D_effective-raw.html b/analysis/hf_reanalysis/D_effective-raw.html new file mode 100644 index 000000000..389cf20c9 --- /dev/null +++ b/analysis/hf_reanalysis/D_effective-raw.html @@ -0,0 +1,14 @@ + + + +
+
+ + \ No newline at end of file diff --git a/analysis/hf_reanalysis/reanalysis.ipynb b/analysis/hf_reanalysis/reanalysis.ipynb index 712c4bb36..91e3d1dcc 100644 --- a/analysis/hf_reanalysis/reanalysis.ipynb +++ b/analysis/hf_reanalysis/reanalysis.ipynb @@ -977,7 +977,7 @@ "id": "e1e91ab4-ae5e-40ff-982e-713925b46922", "metadata": {}, "source": [ - "Visualize how epoch number affects the Chinchilla Scaling Law" + "Visualize how the multi-epoch training pulls away from the Chinchilla scaling law" ] }, { @@ -1026,6 +1026,48 @@ " savepath='single-epoch-runs-fitted-multiepoch-D-residuals.html'\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "1e63c213-4286-468c-a0ea-93d336c98293", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def compute_effective_data(params, L, N):\n", + " a, b, e, alpha, beta = params\n", + " \n", + " quot = np.exp(b) / (L - np.exp(e) - np.exp(a)/N**alpha)\n", + " \n", + " return np.power(quot, 1/beta)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "831a3693-9b00-4bca-b36e-9df1bf170374", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "runs = {\n", + " **runs, \n", + " 'D_effective': compute_effective_data(best_result.x, runs['L'], runs['N'])\n", + "}\n", + "\n", + "scaling_scatter(\n", + " runs,\n", + " x_key='D',\n", + " y_key='R',\n", + " z_key='N',\n", + " color_key='D_effective',\n", + " color_type='log',\n", + " savepath='D_effective-raw.html',\n", + ")" + ] } ], "metadata": {