Skip to content

Commit

Permalink
clean up the notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
Goekdeniz-Guelmez committed Oct 4, 2024
1 parent 9d9eb46 commit 9409eee
Showing 1 changed file with 8 additions and 60 deletions.
68 changes: 8 additions & 60 deletions example-fineweb.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "5a1b6d40-c917-4f30-adf3-c79a50cbc1be",
"metadata": {},
"outputs": [],
Expand All @@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "4552c1ab-b20f-48fc-aa4d-3bf1576261ec",
"metadata": {},
"outputs": [],
Expand All @@ -30,7 +30,7 @@
"\n",
"from trainer.SFTTrainer import train\n",
"from model.args import MOEModelArgs\n",
"from model.KANamav5 import KANamav5\n",
"from model.KANaMoEv1 import KANaMoEv1\n",
"\n",
"from utils import load_model, quick_inference\n",
"\n",
Expand All @@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "77e0b467-149e-4247-bcf6-792ee89fba9f",
"metadata": {},
"outputs": [],
Expand All @@ -65,18 +65,10 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "2a900a88-b089-4089-beaa-474641957678",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[LOADING TOKENIZER]\n"
]
}
],
"outputs": [],
"source": [
"tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen2.5-7B\")\n",
"print(\"[TOKENIZER LOADED]\")\n",
Expand All @@ -103,51 +95,7 @@
"execution_count": null,
"id": "8d4fab82-866a-426e-91dc-5d424e5dc491",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e09ca08e820741309f9c0792a4a7c96b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Resolving data files: 0%| | 0/23781 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "946050163a5d4e569072ef57ff221ae9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Resolving data files: 0%| | 0/250 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Processing dataset:\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Tokenizing dataset: 1103585it [1:02:45, 329.69it/s]"
]
}
],
"outputs": [],
"source": [
"# Load the dataset (replace the old one)\n",
"# use the new dataset \"sample-10BT\" (adjust as needed)\n",
Expand Down Expand Up @@ -246,7 +194,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 9409eee

Please sign in to comment.