diff --git a/notebooks/data_exploration.ipynb b/notebooks/data_exploration.ipynb index bc1f450..2e733d9 100644 --- a/notebooks/data_exploration.ipynb +++ b/notebooks/data_exploration.ipynb @@ -32,7 +32,6 @@ "import plotly.express as px\n", "import seaborn as sns\n", "from db import get_db\n", - "from deep_translator import GoogleTranslator\n", "from settings import Path\n", "from sqlalchemy import text\n", "\n", @@ -49,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -334,7 +333,7 @@ "4 {\"FPS\": 919, \"Gore\": 58, \"Co-op\": 43, \"Retro\":... " ] }, - "execution_count": 3, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -355,51 +354,6 @@ "game_data.head()" ] }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "def is_english(text):\n", - " try:\n", - " # Detect language\n", - " lang = GoogleTranslator(source='auto', target='en').translate(text)\n", - " return lang\n", - " except Exception as e:\n", - " return text\n", - " \n", - "non_english_df = game_data[game_data['english'] == 0]\n", - "non_english_df['genres'] = non_english_df['genres'].apply(lambda x: is_english(str(x)))\n", - "\n", - "# english_count = non_english_df['count_english'].sum()\n", - "# non_english_count = (~non_english_df['count_english']).sum()\n", - "\n", - "# print(f'English Strings: {english_count}')\n", - "# print(f'Non-English Strings: {non_english_count}')" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Action;Adventure;Role Playing (RPG)'" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test = \"Action;Eventyr;Rollespil (RPG)\"\n", - "is_english(test)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -416,10 +370,12 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "from fuzzywuzzy import process\n", + "\n", "def get_unique(series):\n", " \"\"\"\n", " Returns a set of unique values from a series of strings.\n", @@ -431,29 +387,12 @@ " set: A set of unique values extracted from the series.\n", "\n", " \"\"\"\n", - " return set(list(itertools.chain(*series.apply(lambda x: [c for c in x.split(';')]))))\n", - "\n", - "def translate(list):\n", - " \"\"\"\n", - " Translates a list of values using Google Translate API and returns a set of translated values.\n", - " \n", - " Args:\n", - " list (list): A list of values to be translated.\n", - " \n", - " Returns:\n", - " set: A set of translated values.\n", - " \"\"\"\n", - " lst = []\n", - " for value in list:\n", - " translated = GoogleTranslator(source='auto', target='en').translate(value)\n", - " lst.append(translated.title())\n", - " \n", - " return set(lst)" + " return set(list(itertools.chain(*series.apply(lambda x: [c for c in x.split(';')]))))" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -462,16 +401,15 @@ "{'Accounting',\n", " 'Action',\n", " 'Adventure',\n", - " 'Adventure Games',\n", + " 'Adventure games',\n", " 'Animation & Modeling',\n", " 'Audio Production',\n", - " 'Aventura',\n", " 'Casual',\n", " 'Design & Illustration',\n", " 'Early Access',\n", " 'Education',\n", - " 'Fairy Tale',\n", " 'Free To Play',\n", + " 'Free to Play',\n", " 'Game Development',\n", " 'Gore',\n", " 'Indie',\n", @@ -479,10 +417,10 @@ " 'Movie',\n", " 'Nudity',\n", " 'Photo Editing',\n", + " 'RPG',\n", " 'Racing',\n", " 'Role',\n", - " 'Role Playing Game (Rpg)',\n", - " 'Rpg',\n", + " 'Role Playing (RPG)',\n", " 'Sexual Content',\n", " 'Short',\n", " 'Simulation',\n", @@ -493,116 +431,172 @@ " 'Utilities',\n", " 'Video Production',\n", " 'Violent',\n", - " 'Web Publishing'}" + " 'Web Publishing',\n", + " 'action'}" ] }, - "execution_count": 27, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geners = get_unique(game_data['genres'])\n", - "geners_translated = translate(geners)\n", - "geners_translated" + "geners" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "sequence item 0: expected str instance, NoneType found", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[12], line 24\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m match\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# Apply the standardization function to the Genres column\u001b[39;00m\n\u001b[1;32m---> 24\u001b[0m game_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenres\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mgame_data\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mgenres\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstandardize_multiple_genres\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgeners\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 25\u001b[0m geners \u001b[38;5;241m=\u001b[39m get_unique(game_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenres\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 26\u001b[0m geners\n", + "File \u001b[1;32mc:\\Users\\rohit\\miniconda3\\envs\\game\\Lib\\site-packages\\pandas\\core\\series.py:4924\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[0;32m 4789\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[0;32m 4790\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 4791\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4796\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 4797\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[0;32m 4798\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4799\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4800\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4915\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[0;32m 4916\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m 4917\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 4918\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4919\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4920\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4921\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4922\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4923\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m-> 4924\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\rohit\\miniconda3\\envs\\game\\Lib\\site-packages\\pandas\\core\\apply.py:1427\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1424\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[0;32m 1426\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[1;32m-> 1427\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\rohit\\miniconda3\\envs\\game\\Lib\\site-packages\\pandas\\core\\apply.py:1507\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1501\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[0;32m 1502\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[0;32m 1503\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[0;32m 1504\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[0;32m 1505\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[0;32m 1506\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1507\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1508\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[0;32m 1509\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1511\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1512\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1513\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", + "File \u001b[1;32mc:\\Users\\rohit\\miniconda3\\envs\\game\\Lib\\site-packages\\pandas\\core\\base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[1;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[0;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[0;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[1;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\rohit\\miniconda3\\envs\\game\\Lib\\site-packages\\pandas\\core\\algorithms.py:1743\u001b[0m, in \u001b[0;36mmap_array\u001b[1;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[0;32m 1741\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 1743\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[0;32m 1746\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[0;32m 1747\u001b[0m )\n", + "File \u001b[1;32mlib.pyx:2972\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", + "Cell \u001b[1;32mIn[12], line 24\u001b[0m, in \u001b[0;36m\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m match\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# Apply the standardization function to the Genres column\u001b[39;00m\n\u001b[1;32m---> 24\u001b[0m game_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenres\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m game_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenres\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[43mstandardize_multiple_genres\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgeners\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 25\u001b[0m geners \u001b[38;5;241m=\u001b[39m get_unique(game_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenres\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 26\u001b[0m geners\n", + "Cell \u001b[1;32mIn[12], line 17\u001b[0m, in \u001b[0;36mstandardize_multiple_genres\u001b[1;34m(genres_str, genre_list)\u001b[0m\n\u001b[0;32m 15\u001b[0m genres \u001b[38;5;241m=\u001b[39m genres_str\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m;\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 16\u001b[0m standardized_genres \u001b[38;5;241m=\u001b[39m [standardize_genre(genre\u001b[38;5;241m.\u001b[39mstrip(), genre_list) \u001b[38;5;28;01mfor\u001b[39;00m genre \u001b[38;5;129;01min\u001b[39;00m genres]\n\u001b[1;32m---> 17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m;\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43msorted\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mstandardized_genres\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Use sorted(set()) to remove duplicates and sort\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# Find the best match from the list of unique genres\u001b[39;00m\n\u001b[0;32m 20\u001b[0m match, score \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mextractOne(value, genre_list)\n", + "\u001b[1;31mTypeError\u001b[0m: sequence item 0: expected str instance, NoneType found" + ] + } + ], + "source": [ + "def standardize_genre(value, genre_list):\n", + " # Convert to lowercase for consistent comparison\n", + " value_lower = value.lower()\n", + " \n", + " # Define common patterns\n", + " if 'rpg' in value_lower or 'role playing' in value_lower or 'role' in value_lower:\n", + " return 'RPG'\n", + " if 'simulation' in value_lower or 'simulators' in value_lower:\n", + " return 'Simulation'\n", + " if 'adventure' in value_lower:\n", + " return 'Adventure'\n", + "\n", + "# Function to standardize multiple genres\n", + "def standardize_multiple_genres(genres_str, genre_list):\n", + " genres = genres_str.split(';')\n", + " standardized_genres = [standardize_genre(genre.strip(), genre_list) for genre in genres]\n", + " return ';'.join(sorted(set(standardized_genres))) # Use sorted(set()) to remove duplicates and sort\n", + " \n", + " # Find the best match from the list of unique genres\n", + " match, score = process.extractOne(value, genre_list)\n", + " return match\n", + "\n", + "# Apply the standardization function to the Genres column\n", + "game_data['genres'] = game_data['genres'].apply(lambda x: standardize_multiple_genres(x, geners))\n", + "geners = get_unique(game_data['genres'])\n", + "geners" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'A Player',\n", - " 'Captions Available',\n", - " 'Co-Op',\n", - " 'Co-Op Game',\n", - " 'Commentary Available',\n", - " 'Compat. Partial With Control',\n", - " 'Compat. Total Control',\n", - " 'Compat. Total With Control',\n", - " 'Compatible Con Rv',\n", - " 'Controller (Complete)',\n", - " 'Controller (Partially)',\n", - " 'Cooperate',\n", - " 'Cooperative',\n", - " 'Cooperative On Lan',\n", + "{' Co-op',\n", + " ' Family Sharing',\n", + " ' Multiplayer',\n", + " ' Partial controller support',\n", + " ' Remote play on tablet',\n", + " ' Steam Achievements',\n", + " ' Steam trading cards',\n", + " ' Valve Anti-Cheat enabled',\n", + " ' co-op',\n", + " ' cross-platform multiplayer',\n", + " ' in-app purchases',\n", + " ' multiplayer',\n", + " ' online co-op',\n", + " ' partial controller support',\n", + " 'Captions available',\n", + " 'Co-op',\n", + " 'Collectible cards',\n", + " 'Commentary available',\n", + " 'Controller (Full)',\n", + " 'Controller (partial)',\n", " 'Cross-Platform Multiplayer',\n", - " 'Cross-Platform Multiplayer Connection',\n", - " 'Enable Valve Anti-Cheat Protection',\n", + " 'Cross-platform multiplayer',\n", " 'Family Library',\n", " 'Family Library Sharing',\n", - " 'Family Loan',\n", " 'Family Sharing',\n", - " 'For Multiple Players',\n", " 'Full Controller Support',\n", - " 'Hdr Available',\n", + " 'Full controller support',\n", + " 'HDR available',\n", " 'In-App Purchases',\n", + " 'In-app purchases',\n", " 'Includes Level Editor',\n", - " 'Includes Source Sdk',\n", - " 'Individual',\n", - " 'Jxj',\n", - " 'Jxj On-Line',\n", - " 'Lan Co-Op',\n", - " 'Lan Pvp',\n", - " 'Library Sharing',\n", - " 'Mmo',\n", + " 'Includes Source SDK',\n", + " 'Includes level editor',\n", + " 'LAN Co-op',\n", + " 'LAN PvP',\n", + " 'LAN co-op',\n", + " 'Library sharing',\n", + " 'MMO',\n", " 'Mods',\n", - " 'Mods (Require Hl2)',\n", - " 'Motion Detection In Command',\n", - " 'Multi-Player',\n", + " 'Mods (require HL2)',\n", + " 'Motion detection on controller',\n", + " 'Multi-player',\n", " 'Multiplayer',\n", - " 'One Player',\n", - " 'Online Co-Op',\n", - " 'Online Collaboration',\n", - " 'Online Cooperative',\n", - " 'Online Pvp',\n", + " 'Multiple',\n", + " 'Online Co-op',\n", + " 'Online PvP',\n", + " 'Online co-op',\n", " 'Partial Controller Support',\n", - " 'Partially Supported Controllers',\n", - " 'Pvp',\n", - " 'Pvp Split/Shared Screen.',\n", - " 'Remote Play En Tv',\n", - " 'Remote Play On Phone',\n", - " 'Remote Play On Tablet',\n", - " 'Remote Play On Tv',\n", + " 'Partial controller support',\n", + " 'PvP',\n", " 'Remote Play Together',\n", + " 'Remote Play on Phone',\n", + " 'Remote Play on TV',\n", + " 'Remote Play on Tablet',\n", + " 'Remote Play on tablet',\n", " 'Shared/Split Screen',\n", - " 'Shared/Split Screen Co-Op',\n", - " 'Shared/Split Screen Pvp',\n", + " 'Shared/Split Screen Co-op',\n", + " 'Shared/Split Screen PvP',\n", + " 'Single',\n", " 'Single Player',\n", - " 'Single-Player',\n", - " 'Some Controllers Supported',\n", - " 'Split/Shared Screen',\n", - " 'Statistics',\n", + " 'Single player',\n", + " 'Single-player',\n", + " 'Split/shared PvP',\n", + " 'Split/shared screen',\n", " 'Stats',\n", " 'Steam Achievements',\n", " 'Steam Cloud',\n", " 'Steam Leaderboards',\n", - " 'Steam Stickers',\n", " 'Steam Timeline',\n", " 'Steam Trading Cards',\n", " 'Steam Turn Notifications',\n", " 'Steam Workshop',\n", - " 'Steamvr Collectibles',\n", + " 'Steam achievements',\n", + " 'SteamVR Collectibles',\n", " 'Tracked Controller Support',\n", - " 'Trading Cards',\n", - " 'Valve Anti-Cheat Enabled',\n", - " 'Vr Only',\n", - " 'Vr Support',\n", - " 'Vr Supported'}" + " 'VR Only',\n", + " 'VR Support',\n", + " 'VR Supported',\n", + " 'VR support',\n", + " 'Valve Anti-Cheat enabled'}" ] }, - "execution_count": 28, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categories = get_unique(game_data['categories'])\n", - "categories_translated = translate(categories)\n", - "categories_translated" + "categories" ] }, { @@ -1004,13 +998,6 @@ "plt.show()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {},