diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb index fc6383ff..2af47bcf 100644 --- a/ch07/01_main-chapter-code/ch07.ipynb +++ b/ch07/01_main-chapter-code/ch07.ipynb @@ -48,10 +48,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "matplotlib version: 3.7.1\n", - "tiktoken version: 0.7.0\n", - "torch version: 2.3.0+cu121\n", - "tqdm version: 4.66.4\n", + "matplotlib version: 3.9.0\n", + "tiktoken version: 0.5.1\n", + "torch version: 2.2.2\n", + "tqdm version: 4.66.2\n", "tensorflow version: 2.15.0\n" ] } @@ -174,7 +174,7 @@ "\n", " if not os.path.exists(file_path):\n", " with urllib.request.urlopen(url) as response:\n", - " text_data = response.read().decode('utf-8')\n", + " text_data = response.read().decode(\"utf-8\")\n", " with open(file_path, \"w\", encoding=\"utf-8\") as file:\n", " file.write(text_data)\n", " else:\n", @@ -221,13 +221,12 @@ "output_type": "stream", "text": [ "Example entry:\n", - "\n", " {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n" ] } ], "source": [ - "print(\"Example entry:\\n\\n\", data[50])" + "print(\"Example entry:\\n\", data[50])" ] }, { @@ -257,13 +256,12 @@ "output_type": "stream", "text": [ "Another example entry:\n", - "\n", " {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n" ] } ], "source": [ - "print(\"Another example entry:\\n\\n\", data[999])" + "print(\"Another example entry:\\n\", data[999])" ] }, { @@ -511,20 +509,6 @@ { "cell_type": "code", "execution_count": 10, - "id": "K6MWf0lhu8GP", - "metadata": { - "id": "K6MWf0lhu8GP" - }, - "outputs": [], - "source": [ - "import tiktoken\n", - "\n", - "tokenizer = tiktoken.get_encoding(\"gpt2\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb", "metadata": { "id": "adc29dc4-f1c7-4c71-937b-95119d6239bb" @@ -569,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96", "metadata": { "colab": { @@ -588,6 +572,9 @@ } ], "source": [ + "import tiktoken\n", + "tokenizer = tiktoken.get_encoding(\"gpt2\")\n", + "\n", "print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))" ] },