Skip to content

Commit

Permalink
formating updates
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jun 17, 2024
1 parent f627411 commit 7b2174b
Showing 1 changed file with 11 additions and 24 deletions.
35 changes: 11 additions & 24 deletions ch07/01_main-chapter-code/ch07.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"matplotlib version: 3.7.1\n",
"tiktoken version: 0.7.0\n",
"torch version: 2.3.0+cu121\n",
"tqdm version: 4.66.4\n",
"matplotlib version: 3.9.0\n",
"tiktoken version: 0.5.1\n",
"torch version: 2.2.2\n",
"tqdm version: 4.66.2\n",
"tensorflow version: 2.15.0\n"
]
}
Expand Down Expand Up @@ -174,7 +174,7 @@
"\n",
" if not os.path.exists(file_path):\n",
" with urllib.request.urlopen(url) as response:\n",
" text_data = response.read().decode('utf-8')\n",
" text_data = response.read().decode(\"utf-8\")\n",
" with open(file_path, \"w\", encoding=\"utf-8\") as file:\n",
" file.write(text_data)\n",
" else:\n",
Expand Down Expand Up @@ -221,13 +221,12 @@
"output_type": "stream",
"text": [
"Example entry:\n",
"\n",
" {'instruction': 'Identify the correct spelling of the following word.', 'input': 'Ocassion', 'output': \"The correct spelling is 'Occasion.'\"}\n"
]
}
],
"source": [
"print(\"Example entry:\\n\\n\", data[50])"
"print(\"Example entry:\\n\", data[50])"
]
},
{
Expand Down Expand Up @@ -257,13 +256,12 @@
"output_type": "stream",
"text": [
"Another example entry:\n",
"\n",
" {'instruction': \"What is an antonym of 'complicated'?\", 'input': '', 'output': \"An antonym of 'complicated' is 'simple'.\"}\n"
]
}
],
"source": [
"print(\"Another example entry:\\n\\n\", data[999])"
"print(\"Another example entry:\\n\", data[999])"
]
},
{
Expand Down Expand Up @@ -511,20 +509,6 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "K6MWf0lhu8GP",
"metadata": {
"id": "K6MWf0lhu8GP"
},
"outputs": [],
"source": [
"import tiktoken\n",
"\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb",
"metadata": {
"id": "adc29dc4-f1c7-4c71-937b-95119d6239bb"
Expand Down Expand Up @@ -569,7 +553,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"id": "ff24fe1a-5746-461c-ad3d-b6d84a1a7c96",
"metadata": {
"colab": {
Expand All @@ -588,6 +572,9 @@
}
],
"source": [
"import tiktoken\n",
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
"\n",
"print(tokenizer.encode(\"<|endoftext|>\", allowed_special={\"<|endoftext|>\"}))"
]
},
Expand Down

0 comments on commit 7b2174b

Please sign in to comment.