From d0bb749d085a2a57cc71f1d901929cdac68c10b1 Mon Sep 17 00:00:00 2001 From: Salla Kaushik Date: Sun, 5 Jan 2025 02:14:19 +0530 Subject: [PATCH] Added "Expanding Contractions" Feature to Preprocessing --- .../aossie-fake-news-tfidf-nn.ipynb | 606 ++++-------------- 1 file changed, 140 insertions(+), 466 deletions(-) diff --git a/ML/Fake NEWS/Models/Jupyter Notebook/aossie-fake-news-tfidf-nn.ipynb b/ML/Fake NEWS/Models/Jupyter Notebook/aossie-fake-news-tfidf-nn.ipynb index 9648a657..d27a9ea6 100644 --- a/ML/Fake NEWS/Models/Jupyter Notebook/aossie-fake-news-tfidf-nn.ipynb +++ b/ML/Fake NEWS/Models/Jupyter Notebook/aossie-fake-news-tfidf-nn.ipynb @@ -2,67 +2,142 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: nltk in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (3.8.1)\n", - "Requirement already satisfied: tqdm in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (4.66.4)\n", - "Requirement already satisfied: keras in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (3.3.3)\n", - "Requirement already satisfied: tensorflow in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (2.16.1)\n", - "Requirement already satisfied: click in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from nltk) (8.1.7)\n", - "Requirement already satisfied: joblib in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from nltk) (1.4.2)\n", - "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from nltk) (2024.5.15)\n", - "Requirement already satisfied: colorama in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tqdm) (0.4.6)\n", - "Requirement already satisfied: absl-py in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (2.1.0)\n", - "Requirement already satisfied: numpy in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (1.26.4)\n", - "Requirement already satisfied: rich in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (13.7.1)\n", - "Requirement already satisfied: namex in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (0.0.8)\n", - "Requirement already satisfied: h5py in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (3.11.0)\n", - "Requirement already satisfied: optree in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (0.11.0)\n", - "Requirement already satisfied: ml-dtypes in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from keras) (0.3.2)\n", - "Requirement already satisfied: tensorflow-intel==2.16.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow) (2.16.1)\n", - "Requirement already satisfied: astunparse>=1.6.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=23.5.26 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (24.3.25)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (0.5.4)\n", - "Requirement already satisfied: google-pasta>=0.1.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (18.1.1)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (3.3.0)\n", - "Requirement already satisfied: packaging in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (24.0)\n", - "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (4.25.3)\n", - "Requirement already satisfied: requests<3,>=2.21.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (2.31.0)\n", - "Requirement already satisfied: setuptools in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (69.5.1)\n", - "Requirement already satisfied: six>=1.12.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (1.16.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (2.4.0)\n", - "Requirement already satisfied: typing-extensions>=3.6.6 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (4.11.0)\n", - "Requirement already satisfied: wrapt>=1.11.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (1.16.0)\n", - "Requirement already satisfied: grpcio<2.0,>=1.24.3 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (1.63.0)\n", - "Requirement already satisfied: tensorboard<2.17,>=2.16 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorflow-intel==2.16.1->tensorflow) (2.16.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from rich->keras) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from rich->keras) (2.18.0)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from astunparse>=1.6.0->tensorflow-intel==2.16.1->tensorflow) (0.43.0)\n", - "Requirement already satisfied: mdurl~=0.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich->keras) (0.1.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow) (2.2.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow) (2024.6.2)\n", - "Requirement already satisfied: markdown>=2.6.8 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorboard<2.17,>=2.16->tensorflow-intel==2.16.1->tensorflow) (3.6)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorboard<2.17,>=2.16->tensorflow-intel==2.16.1->tensorflow) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from tensorboard<2.17,>=2.16->tensorflow-intel==2.16.1->tensorflow) (3.0.3)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in c:\\users\\visha\\anaconda3\\envs\\pyenv\\lib\\site-packages (from werkzeug>=1.0.1->tensorboard<2.17,>=2.16->tensorflow-intel==2.16.1->tensorflow) (2.1.5)\n" + "Collecting nltk\n", + " Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: tqdm in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (4.66.4)\n", + "Collecting keras\n", + " Downloading keras-3.7.0-py3-none-any.whl.metadata (5.8 kB)\n", + "Collecting tensorflow\n", + " Downloading tensorflow-2.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", + "Collecting click (from nltk)\n", + " Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting joblib (from nltk)\n", + " Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting regex>=2021.8.3 (from nltk)\n", + " Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", + "Collecting absl-py (from keras)\n", + " Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting numpy (from keras)\n", + " Downloading numpy-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)\n", + "Collecting rich (from keras)\n", + " Downloading rich-13.9.4-py3-none-any.whl.metadata (18 kB)\n", + "Collecting namex (from keras)\n", + " Downloading namex-0.0.8-py3-none-any.whl.metadata (246 bytes)\n", + "Collecting h5py (from keras)\n", + " Downloading h5py-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)\n", + "Collecting optree (from keras)\n", + " Downloading optree-0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (47 kB)\n", + "Collecting ml-dtypes (from keras)\n", + " Downloading ml_dtypes-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n", + "Requirement already satisfied: packaging in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from keras) (24.1)\n", + "Collecting astunparse>=1.6.0 (from tensorflow)\n", + " Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting flatbuffers>=24.3.25 (from tensorflow)\n", + " Downloading flatbuffers-24.12.23-py2.py3-none-any.whl.metadata (876 bytes)\n", + "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)\n", + " Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting google-pasta>=0.1.1 (from tensorflow)\n", + " Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n", + "Collecting libclang>=13.0.0 (from tensorflow)\n", + " Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n", + "Collecting opt-einsum>=2.3.2 (from tensorflow)\n", + " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n", + "Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow)\n", + " Downloading protobuf-5.29.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from tensorflow) (2.32.3)\n", + "Requirement already satisfied: setuptools in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from tensorflow) (72.1.0)\n", + "Requirement already satisfied: six>=1.12.0 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from tensorflow) (1.17.0)\n", + "Collecting termcolor>=1.1.0 (from tensorflow)\n", + " Downloading termcolor-2.5.0-py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: typing-extensions>=3.6.6 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from tensorflow) (4.12.2)\n", + "Collecting wrapt>=1.11.0 (from tensorflow)\n", + " Downloading wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.4 kB)\n", + "Collecting grpcio<2.0,>=1.24.3 (from tensorflow)\n", + " Downloading grpcio-1.68.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)\n", + "Collecting tensorboard<2.19,>=2.18 (from tensorflow)\n", + " Downloading tensorboard-2.18.0-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting numpy (from keras)\n", + " Downloading numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n", + "Collecting ml-dtypes (from keras)\n", + " Downloading ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from requests<3,>=2.21.0->tensorflow) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from requests<3,>=2.21.0->tensorflow) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from requests<3,>=2.21.0->tensorflow) (2.2.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from requests<3,>=2.21.0->tensorflow) (2024.12.14)\n", + "Collecting markdown>=2.6.8 (from tensorboard<2.19,>=2.18->tensorflow)\n", + " Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard<2.19,>=2.18->tensorflow)\n", + " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "Collecting werkzeug>=1.0.1 (from tensorboard<2.19,>=2.18->tensorflow)\n", + " Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n", + "Collecting markdown-it-py>=2.2.0 (from rich->keras)\n", + " Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/salla-kaushik/conda_root/lib/python3.12/site-packages (from rich->keras) (2.18.0)\n", + "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->keras)\n", + " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting MarkupSafe>=2.1.1 (from werkzeug>=1.0.1->tensorboard<2.19,>=2.18->tensorflow)\n", + " Downloading MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n", + "Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading keras-3.7.0-py3-none-any.whl (1.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading tensorflow-2.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (615.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m615.5/615.5 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:04\u001b[0m\n", + "\u001b[?25hDownloading absl_py-2.1.0-py3-none-any.whl (133 kB)\n", + "Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", + "Downloading flatbuffers-24.12.23-py2.py3-none-any.whl (30 kB)\n", + "Downloading gast-0.6.0-py3-none-any.whl (21 kB)\n", + "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", + "Downloading grpcio-1.68.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0m\n", + "\u001b[?25hDownloading h5py-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n", + "Downloading protobuf-5.29.2-cp38-abi3-manylinux2014_x86_64.whl (319 kB)\n", + "Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (796 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m796.9/796.9 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard-2.18.0-py3-none-any.whl (5.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hDownloading termcolor-2.5.0-py3-none-any.whl (7.8 kB)\n", + "Downloading wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (89 kB)\n", + "Downloading click-8.1.8-py3-none-any.whl (98 kB)\n", + "Downloading joblib-1.4.2-py3-none-any.whl (301 kB)\n", + "Downloading namex-0.0.8-py3-none-any.whl (5.8 kB)\n", + "Downloading optree-0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (385 kB)\n", + "Downloading rich-13.9.4-py3-none-any.whl (242 kB)\n", + "Downloading Markdown-3.7-py3-none-any.whl (106 kB)\n", + "Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", + "Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hDownloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n", + "Downloading MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)\n", + "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Installing collected packages: namex, libclang, flatbuffers, wrapt, termcolor, tensorboard-data-server, regex, protobuf, optree, opt-einsum, numpy, mdurl, MarkupSafe, markdown, joblib, grpcio, google-pasta, gast, click, astunparse, absl-py, werkzeug, nltk, ml-dtypes, markdown-it-py, h5py, tensorboard, rich, keras, tensorflow\n", + "Successfully installed MarkupSafe-3.0.2 absl-py-2.1.0 astunparse-1.6.3 click-8.1.8 flatbuffers-24.12.23 gast-0.6.0 google-pasta-0.2.0 grpcio-1.68.1 h5py-3.12.1 joblib-1.4.2 keras-3.7.0 libclang-18.1.1 markdown-3.7 markdown-it-py-3.0.0 mdurl-0.1.2 ml-dtypes-0.4.1 namex-0.0.8 nltk-3.9.1 numpy-2.0.2 opt-einsum-3.4.0 optree-0.13.1 protobuf-5.29.2 regex-2024.11.6 rich-13.9.4 tensorboard-2.18.0 tensorboard-data-server-0.7.2 tensorflow-2.18.0 termcolor-2.5.0 werkzeug-3.1.3 wrapt-1.17.0\n" ] } ], "source": [ - "!pip install nltk tqdm keras tensorflow" + "!pip install nltk tqdm keras tensorflow contractions" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -101,425 +176,19 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
TfidfVectorizer(max_features=80, stop_words='english', token_pattern='\\\\w{1,}')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "TfidfVectorizer(max_features=80, stop_words='english', token_pattern='\\\\w{1,}')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'totaldata' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m tfidf_vect\u001b[38;5;241m.\u001b[39mfit(\u001b[43mtotaldata\u001b[49m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'totaldata' is not defined" + ] } ], "source": [ @@ -551,6 +220,7 @@ "import numpy as np\n", "from sklearn import feature_extraction\n", "from tqdm import tqdm\n", + "import contractions #Update for expanding contractions\n", "\n", "\n", "_wnl = nltk.WordNetLemmatizer()\n", @@ -564,6 +234,10 @@ " return [normalize_word(t) for t in nltk.word_tokenize(s)]\n", "\n", "\n", + "def expanding_contractions(s):\n", + " return contractions.fix(s)\n", + "\n", + "\n", "def clean(s):\n", " # Cleans a string: Lowercasing, trimming, removing non-alphanumeric\n", "\n", @@ -1517,7 +1191,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -1531,7 +1205,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.2" } }, "nbformat": 4,