diff --git a/NER-Spacy.ipynb b/NER-Spacy.ipynb new file mode 100644 index 0000000..ca2fb13 --- /dev/null +++ b/NER-Spacy.ipynb @@ -0,0 +1,1332 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd12bc9a", + "metadata": {}, + "source": [ + "# NER Resume Parser" + ] + }, + { + "cell_type": "markdown", + "id": "ba00dc44", + "metadata": {}, + "source": [ + "## Import all the Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "24b40b4c", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import random\n", + "import spacy" + ] + }, + { + "cell_type": "markdown", + "id": "62d18333", + "metadata": {}, + "source": [ + "## Split of data into train and test\n", + "Total of 94 resumes are taken as data and splitted into training and testing data using json line formatted data.\n", + "1. train_data = 80 resumes (data used for training the model)\n", + "2. test_data = 14 resumes (data used for the tetsting the accuracy of the trained model)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6221d836", + "metadata": {}, + "outputs": [], + "source": [ + "data = []\n", + "with open(\"data.jsonl\", encoding=\"utf8\") as d:\n", + " for line in d:\n", + " dat = json.loads(line)\n", + " text = dat[\"data\"]\n", + " entity = dat[\"label\"]\n", + " entities = []\n", + " for ent in entity:\n", + " entities.append((ent[0], ent[1], ent[2]))\n", + " data.append((text, {\"entities\" : entities}))\n", + "\n", + "train_data = data[:80]\n", + "test_data = data[80:]" + ] + }, + { + "cell_type": "markdown", + "id": "e2e191fc", + "metadata": {}, + "source": [ + "## Creating Spacy Model\n", + "Blank spacy model is created and pipeline is defined, furthermore as labels list is defined they are added into the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "43795394", + "metadata": {}, + "outputs": [], + "source": [ + "LABELS = [\"name\", \"phone_no\", \"email\", \"linkedin\", \"github\", \"designation\",\n", + " \"company\", \"job-duration\", \"Experience\", \"degree\", \"academic-institute\", \"databases\", \"tools\", \"core-skills\", \"soft-skills\", \"cloud-platforms\", \n", + " \"Front End\", \"Back End\", \"Mobile App\", \"Libraries\"]\n", + "\n", + "\n", + "model = spacy.blank('en')\n", + "if 'ner' not in model.pipe_names:\n", + " ner = model.create_pipe('ner')\n", + " model.add_pipe(ner, last=True)\n", + " \n", + "\n", + "for i in LABELS:\n", + " ner.add_label(i)" + ] + }, + { + "cell_type": "markdown", + "id": "3f1e6544", + "metadata": {}, + "source": [ + "## Training the Spacy Model\n", + "The model which is created above will be trained on 80 resumes and here all other pipelines except the ner one which is created above are disabled. \n", + "Also, the train_data is shuffled so the order doesn't create an issue but mostly it will be other way around." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "50c783fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Statring iteration 0\n", + "{'ner': 9086.016344472797}\n", + "Statring iteration 1\n", + "{'ner': 5775.54421436147}\n", + "Statring iteration 2\n", + "{'ner': 5790.462567481561}\n", + "Statring iteration 3\n", + "{'ner': 5649.707963589194}\n", + "Statring iteration 4\n", + "{'ner': 4530.505186055407}\n", + "Statring iteration 5\n", + "{'ner': 4118.0038339086495}\n", + "Statring iteration 6\n", + "{'ner': 4319.213940221822}\n", + "Statring iteration 7\n", + "{'ner': 4377.778997759579}\n", + "Statring iteration 8\n", + "{'ner': 3738.5067388601824}\n", + "Statring iteration 9\n", + "{'ner': 3707.0684635037087}\n", + "Statring iteration 10\n", + "{'ner': 3529.437793201774}\n", + "Statring iteration 11\n", + "{'ner': 3399.4632947736563}\n", + "Statring iteration 12\n", + "{'ner': 3174.375069948829}\n", + "Statring iteration 13\n", + "{'ner': 3091.4008657450927}\n", + "Statring iteration 14\n", + "{'ner': 2946.735754441812}\n", + "Statring iteration 15\n", + "{'ner': 2808.616162615468}\n", + "Statring iteration 16\n", + "{'ner': 2671.855197548338}\n", + "Statring iteration 17\n", + "{'ner': 2740.0388042511067}\n", + "Statring iteration 18\n", + "{'ner': 2474.338745852204}\n", + "Statring iteration 19\n", + "{'ner': 2457.9215802250037}\n", + "Statring iteration 20\n", + "{'ner': 2273.753974043272}\n", + "Statring iteration 21\n", + "{'ner': 2352.766884834546}\n", + "Statring iteration 22\n", + "{'ner': 2111.4251207869}\n", + "Statring iteration 23\n", + "{'ner': 1998.8233448928465}\n", + "Statring iteration 24\n", + "{'ner': 1893.2770475129003}\n", + "Statring iteration 25\n", + "{'ner': 1859.7656217067888}\n", + "Statring iteration 26\n", + "{'ner': 1995.5918579566692}\n", + "Statring iteration 27\n", + "{'ner': 1878.0370160785453}\n", + "Statring iteration 28\n", + "{'ner': 1826.033167198379}\n", + "Statring iteration 29\n", + "{'ner': 1699.7448065071844}\n", + "Statring iteration 30\n", + "{'ner': 1669.246304862996}\n", + "Statring iteration 31\n", + "{'ner': 1603.797357835939}\n", + "Statring iteration 32\n", + "{'ner': 1557.6811512677803}\n", + "Statring iteration 33\n", + "{'ner': 1583.793140939754}\n", + "Statring iteration 34\n", + "{'ner': 1537.3016351064866}\n", + "Statring iteration 35\n", + "{'ner': 1510.7934109440237}\n", + "Statring iteration 36\n", + "{'ner': 1436.8033965442955}\n", + "Statring iteration 37\n", + "{'ner': 1271.734294297113}\n", + "Statring iteration 38\n", + "{'ner': 1359.139771235486}\n", + "Statring iteration 39\n", + "{'ner': 1361.3017688439836}\n", + "Statring iteration 40\n", + "{'ner': 1249.8118108668016}\n", + "Statring iteration 41\n", + "{'ner': 1182.580568376547}\n", + "Statring iteration 42\n", + "{'ner': 1210.5344900249504}\n", + "Statring iteration 43\n", + "{'ner': 1228.3195641275415}\n", + "Statring iteration 44\n", + "{'ner': 1253.8290337327537}\n", + "Statring iteration 45\n", + "{'ner': 1065.4236901959225}\n", + "Statring iteration 46\n", + "{'ner': 1064.8518753976518}\n", + "Statring iteration 47\n", + "{'ner': 1121.9674745758186}\n", + "Statring iteration 48\n", + "{'ner': 987.8581712224262}\n", + "Statring iteration 49\n", + "{'ner': 1191.5008400368974}\n", + "Statring iteration 50\n", + "{'ner': 989.3080851670912}\n", + "Statring iteration 51\n", + "{'ner': 1011.5684553671282}\n", + "Statring iteration 52\n", + "{'ner': 1039.3215303689951}\n", + "Statring iteration 53\n", + "{'ner': 989.5272850326904}\n", + "Statring iteration 54\n", + "{'ner': 989.5076693307831}\n", + "Statring iteration 55\n", + "{'ner': 917.5237792466704}\n", + "Statring iteration 56\n", + "{'ner': 1044.0032193850834}\n", + "Statring iteration 57\n", + "{'ner': 929.771797746006}\n", + "Statring iteration 58\n", + "{'ner': 907.5507922539111}\n", + "Statring iteration 59\n", + "{'ner': 908.0261412047606}\n", + "Statring iteration 60\n", + "{'ner': 902.0115867543378}\n", + "Statring iteration 61\n", + "{'ner': 807.0997419109592}\n", + "Statring iteration 62\n", + "{'ner': 833.4068650006163}\n", + "Statring iteration 63\n", + "{'ner': 967.349736272527}\n", + "Statring iteration 64\n", + "{'ner': 821.5092544326112}\n", + "Statring iteration 65\n", + "{'ner': 903.7021073161387}\n", + "Statring iteration 66\n", + "{'ner': 831.9164671048462}\n", + "Statring iteration 67\n", + "{'ner': 871.742578251797}\n", + "Statring iteration 68\n", + "{'ner': 741.1430313183073}\n", + "Statring iteration 69\n", + "{'ner': 792.2623349983895}\n", + "Statring iteration 70\n", + "{'ner': 811.9704209371259}\n", + "Statring iteration 71\n", + "{'ner': 782.1085248622207}\n", + "Statring iteration 72\n", + "{'ner': 752.017321055459}\n", + "Statring iteration 73\n", + "{'ner': 768.4190184370088}\n", + "Statring iteration 74\n", + "{'ner': 779.4962581201327}\n", + "Statring iteration 75\n", + "{'ner': 757.9075173855987}\n", + "Statring iteration 76\n", + "{'ner': 774.5806727931842}\n", + "Statring iteration 77\n", + "{'ner': 776.4203950752358}\n", + "Statring iteration 78\n", + "{'ner': 851.0435966025822}\n", + "Statring iteration 79\n", + "{'ner': 682.224891787438}\n", + "Statring iteration 80\n", + "{'ner': 708.5797765854545}\n", + "Statring iteration 81\n", + "{'ner': 678.9030829826721}\n", + "Statring iteration 82\n", + "{'ner': 673.2667065557498}\n", + "Statring iteration 83\n", + "{'ner': 757.8439333253298}\n", + "Statring iteration 84\n", + "{'ner': 719.5916064564403}\n", + "Statring iteration 85\n", + "{'ner': 678.4867065504762}\n", + "Statring iteration 86\n", + "{'ner': 697.7648183335556}\n", + "Statring iteration 87\n", + "{'ner': 662.7924128369141}\n", + "Statring iteration 88\n", + "{'ner': 661.9186845174347}\n", + "Statring iteration 89\n", + "{'ner': 610.0630302219195}\n", + "Statring iteration 90\n", + "{'ner': 705.6510958431682}\n", + "Statring iteration 91\n", + "{'ner': 668.0649264788594}\n", + "Statring iteration 92\n", + "{'ner': 612.3427860238099}\n", + "Statring iteration 93\n", + "{'ner': 567.6168144948003}\n", + "Statring iteration 94\n", + "{'ner': 571.1491016213412}\n", + "Statring iteration 95\n", + "{'ner': 605.7095353391505}\n", + "Statring iteration 96\n", + "{'ner': 579.6924187086555}\n", + "Statring iteration 97\n", + "{'ner': 599.7420633821346}\n", + "Statring iteration 98\n", + "{'ner': 626.397938695232}\n", + "Statring iteration 99\n", + "{'ner': 625.7287653744859}\n", + "Statring iteration 100\n", + "{'ner': 649.7325127651347}\n", + "Statring iteration 101\n", + "{'ner': 653.1703058982575}\n", + "Statring iteration 102\n", + "{'ner': 629.6442747596136}\n", + "Statring iteration 103\n", + "{'ner': 653.804745755265}\n", + "Statring iteration 104\n", + "{'ner': 576.2312764422908}\n", + "Statring iteration 105\n", + "{'ner': 589.9479672370101}\n", + "Statring iteration 106\n", + "{'ner': 530.7296469660316}\n", + "Statring iteration 107\n", + "{'ner': 560.3971356624077}\n", + "Statring iteration 108\n", + "{'ner': 570.7551146543868}\n", + "Statring iteration 109\n", + "{'ner': 530.9403918811915}\n", + "Statring iteration 110\n", + "{'ner': 490.4933103235473}\n", + "Statring iteration 111\n", + "{'ner': 529.6743489352489}\n", + "Statring iteration 112\n", + "{'ner': 498.858061666545}\n", + "Statring iteration 113\n", + "{'ner': 564.0019337748205}\n", + "Statring iteration 114\n", + "{'ner': 474.07985162542263}\n", + "Statring iteration 115\n", + "{'ner': 466.0157573027494}\n", + "Statring iteration 116\n", + "{'ner': 471.05503770083726}\n", + "Statring iteration 117\n", + "{'ner': 465.7905608155209}\n", + "Statring iteration 118\n", + "{'ner': 509.62904474321095}\n", + "Statring iteration 119\n", + "{'ner': 564.1760609157783}\n", + "Statring iteration 120\n", + "{'ner': 514.4198024739}\n", + "Statring iteration 121\n", + "{'ner': 521.329736790513}\n", + "Statring iteration 122\n", + "{'ner': 526.2898955801168}\n", + "Statring iteration 123\n", + "{'ner': 484.2427894830027}\n", + "Statring iteration 124\n", + "{'ner': 430.0654250592848}\n", + "Statring iteration 125\n", + "{'ner': 589.3226994138906}\n", + "Statring iteration 126\n", + "{'ner': 503.82606401856503}\n", + "Statring iteration 127\n", + "{'ner': 451.3896488143979}\n", + "Statring iteration 128\n", + "{'ner': 425.00475616467463}\n", + "Statring iteration 129\n", + "{'ner': 529.8201284770594}\n", + "Statring iteration 130\n", + "{'ner': 470.8070112635863}\n", + "Statring iteration 131\n", + "{'ner': 533.2260646403494}\n", + "Statring iteration 132\n", + "{'ner': 442.4869201743114}\n", + "Statring iteration 133\n", + "{'ner': 515.0579264728459}\n", + "Statring iteration 134\n", + "{'ner': 479.0685049603032}\n", + "Statring iteration 135\n", + "{'ner': 431.9961002286442}\n", + "Statring iteration 136\n", + "{'ner': 416.38669461595043}\n", + "Statring iteration 137\n", + "{'ner': 435.2640504282634}\n", + "Statring iteration 138\n", + "{'ner': 465.11917743110536}\n", + "Statring iteration 139\n", + "{'ner': 396.28058649166917}\n", + "Statring iteration 140\n", + "{'ner': 435.4093827636076}\n", + "Statring iteration 141\n", + "{'ner': 389.734462427303}\n", + "Statring iteration 142\n", + "{'ner': 455.3303486136667}\n", + "Statring iteration 143\n", + "{'ner': 416.5047243802114}\n", + "Statring iteration 144\n", + "{'ner': 395.8844712882857}\n", + "Statring iteration 145\n", + "{'ner': 483.8514336595549}\n", + "Statring iteration 146\n", + "{'ner': 342.1261681695794}\n", + "Statring iteration 147\n", + "{'ner': 348.1230903080657}\n", + "Statring iteration 148\n", + "{'ner': 391.66417708294716}\n", + "Statring iteration 149\n", + "{'ner': 417.23216064603747}\n", + "Statring iteration 150\n", + "{'ner': 515.1199292187869}\n", + "Statring iteration 151\n", + "{'ner': 386.60819755381533}\n", + "Statring iteration 152\n", + "{'ner': 336.56691523071254}\n", + "Statring iteration 153\n", + "{'ner': 365.3712360755889}\n", + "Statring iteration 154\n", + "{'ner': 382.3196571072746}\n", + "Statring iteration 155\n", + "{'ner': 346.1219977904216}\n", + "Statring iteration 156\n", + "{'ner': 366.61023544107286}\n", + "Statring iteration 157\n", + "{'ner': 476.90792942914254}\n", + "Statring iteration 158\n", + "{'ner': 355.200903085948}\n", + "Statring iteration 159\n", + "{'ner': 502.2747898320779}\n", + "Statring iteration 160\n", + "{'ner': 424.08611218576897}\n", + "Statring iteration 161\n", + "{'ner': 344.4564430763005}\n", + "Statring iteration 162\n", + "{'ner': 420.00246793701893}\n", + "Statring iteration 163\n", + "{'ner': 391.8816088003872}\n", + "Statring iteration 164\n", + "{'ner': 364.94152851227125}\n", + "Statring iteration 165\n", + "{'ner': 331.1595682493113}\n", + "Statring iteration 166\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'ner': 405.05396767102155}\n", + "Statring iteration 167\n", + "{'ner': 361.86459688912413}\n", + "Statring iteration 168\n", + "{'ner': 340.7971179377868}\n", + "Statring iteration 169\n", + "{'ner': 348.37238743079007}\n", + "Statring iteration 170\n", + "{'ner': 313.46543275349825}\n", + "Statring iteration 171\n", + "{'ner': 368.9364160980823}\n", + "Statring iteration 172\n", + "{'ner': 298.50807290967737}\n", + "Statring iteration 173\n", + "{'ner': 347.40076645291754}\n", + "Statring iteration 174\n", + "{'ner': 372.67491239407843}\n", + "Statring iteration 175\n", + "{'ner': 291.54123632431987}\n", + "Statring iteration 176\n", + "{'ner': 335.36426562878364}\n", + "Statring iteration 177\n", + "{'ner': 381.93293677266803}\n", + "Statring iteration 178\n", + "{'ner': 312.7087802256321}\n", + "Statring iteration 179\n", + "{'ner': 285.12354443455854}\n", + "Statring iteration 180\n", + "{'ner': 389.3947435995446}\n", + "Statring iteration 181\n", + "{'ner': 389.08302324264076}\n", + "Statring iteration 182\n", + "{'ner': 348.7697888022164}\n", + "Statring iteration 183\n", + "{'ner': 344.5794787355446}\n", + "Statring iteration 184\n", + "{'ner': 354.331606467093}\n", + "Statring iteration 185\n", + "{'ner': 281.84807877029584}\n", + "Statring iteration 186\n", + "{'ner': 284.09417098880135}\n", + "Statring iteration 187\n", + "{'ner': 285.4785270111182}\n", + "Statring iteration 188\n", + "{'ner': 299.9627820081848}\n", + "Statring iteration 189\n", + "{'ner': 359.3744061673036}\n", + "Statring iteration 190\n", + "{'ner': 379.75123189961363}\n", + "Statring iteration 191\n", + "{'ner': 356.8926703230818}\n", + "Statring iteration 192\n", + "{'ner': 391.2505771032391}\n", + "Statring iteration 193\n", + "{'ner': 342.70690655363234}\n", + "Statring iteration 194\n", + "{'ner': 352.6055919371783}\n", + "Statring iteration 195\n", + "{'ner': 313.85778015288975}\n", + "Statring iteration 196\n", + "{'ner': 324.59503296857764}\n", + "Statring iteration 197\n", + "{'ner': 312.04852888196075}\n", + "Statring iteration 198\n", + "{'ner': 287.99412299457305}\n", + "Statring iteration 199\n", + "{'ner': 286.115672350375}\n" + ] + } + ], + "source": [ + "other_pipes = [pipe for pipe in model.pipe_names if pipe != 'ner']\n", + "with model.disable_pipes(*other_pipes): # only train NER\n", + " optimizer = model.begin_training()\n", + " for itn in range(200):\n", + " print(\"Statring iteration \" + str(itn))\n", + " random.shuffle(train_data)\n", + " losses = {}\n", + " for text, annotations in train_data:\n", + " model.update(\n", + " [text], \n", + " [annotations],\n", + " drop=0.3,\n", + " sgd=optimizer,\n", + " losses=losses)\n", + " print(losses)" + ] + }, + { + "cell_type": "markdown", + "id": "4cdf4d67", + "metadata": {}, + "source": [ + "## Testing the Spacy Model\n", + "After training part is completed, using test_data the \"model\" predicts the labels and their data and write them into txt files.\n", + "The below case predicts 14 txt files will be there to see how the model predicted." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "2edc18af", + "metadata": {}, + "outputs": [], + "source": [ + "c = 0 \n", + "for text,annot in test_data:\n", + " f = open(\"resume\"+str(c)+\".txt\",\"w+\", encoding=\"utf8\")\n", + " doc_to_test = model(text)\n", + " d = {}\n", + " for ent in doc_to_test.ents:\n", + " d[ent.label_] = []\n", + " for ent in doc_to_test.ents:\n", + " d[ent.label_].append(ent.text)\n", + " for i in set(d.keys()):\n", + " f.write(\"\\n\\n\")\n", + " f.write(i +\":\"+\"\\n\")\n", + " for j in set(d[i]):\n", + " f.write(j.replace('\\n','')+\"\\n\")\n", + " c+=1\n", + " \n", + "model.to_disk(\"my_model\")" + ] + }, + { + "cell_type": "markdown", + "id": "c42bbbeb", + "metadata": {}, + "source": [ + "## Accuracy of Model\n", + "The below code let's you see accuracy on each resume and on each label which in turn could help us to see where the improvement is needed." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "b3fcec05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['problem solving', 'soft-skills'], ['leadership skill', 'soft-skills'], ['team management', 'soft-skills'], ['javascript', 'Front End'], ['angular', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstarp', 'Front End'], ['bachelor engineering', 'degree'], ['shri labhubhai trivedi\\ninstitute engineering technology', 'academic-institute'], ['sql', 'Back End'], ['marketing assistant', 'designation'], ['intuitive problem solving', 'soft-skills'], ['creative thinking', 'soft-skills'], ['communication', 'soft-skills'], ['excellent team player', 'soft-skills'], ['shreyash\\nzinzuvadia', 'name'], ['project management', 'core-skills'], ['google web desiner', 'core-skills'], ['google digital marketing', 'core-skills'], ['9408152104', 'phone_no'], ['8320361177', 'phone_no'], ['shreyashsoni95@gmail .com', 'email'], ['linkedin.com/in/shreyash-\\nzinzuvadia', 'linkedin']]\n", + "[['red blue', 'name'], ['problem solving', 'soft-skills'], ['creative thinking', 'soft-skills'], ['team player', 'soft-skills'], ['leadership skill', 'soft-skills'], ['freelancing', 'designation'], ['team management', 'soft-skills'], ['javascript', 'Front End'], ['angular', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstarp', 'Front End'], ['bachelor engineering', 'degree'], ['shri labhubhai trivedi\\ninstitute engineering technology', 'academic-institute'], ['sql', 'Back End'], ['.com', 'email'], ['zinzuvadia a5547b148', 'company']]\n", + "name : 1, pred: 0\n", + "name : 0.0\n", + "\n", + "phone_no : 2, pred: 0\n", + "phone_no : 0.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "linkedin : 1, pred: 0\n", + "linkedin : 0.0\n", + "\n", + "designation : 1, pred: 0\n", + "designation : 0.0\n", + "\n", + "degree : 1, pred: 1\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "core-skills : 3, pred: 0\n", + "core-skills : 0.0\n", + "\n", + "soft-skills : 7, pred: 6\n", + "soft-skills : 85.71428571428571\n", + "\n", + "Front End : 5, pred: 5\n", + "Front End : 100.0\n", + "\n", + "Back End : 1, pred: 1\n", + "Back End : 100.0\n", + "\n", + "Accuracy per resume: 62.5\n", + "\n", + "[['9586168250', 'phone_no'], ['9725017940', 'phone_no'], ['vishwapatel312@gmail.com', 'email'], ['self motivated', 'soft-skills'], ['fertilizer nagar school gsfc', 'academic-institute'], ['internship', 'designation'], ['technobits digital vadodara', 'company'], ['end developer intern', 'designation'], ['july 2020', 'job-duration'], ['html5', 'Front End'], ['css3', 'Front End'], ['bootstrap', 'Front End'], ['java script', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['php', 'Back End'], ['sql', 'Back End'], ['java', 'Back End'], ['vishwa patel', 'name'], ['work ethics', 'soft-skills'], ['working independently teams', 'soft-skills'], ['interpersonal skills', 'soft-skills'], ['bachelor technology computer science engineering', 'degree'], ['parul university', 'academic-institute'], ['xii', 'degree'], ['x', 'degree'], ['atharva vidhyalaya', 'academic-institute'], ['b2b android app', 'Mobile App']]\n", + "[['9586168250', 'phone_no'], ['9725017940', 'phone_no'], ['vishwa patel', 'name'], ['vishwapatel312@gmail.com', 'email'], ['self motivated', 'soft-skills'], ['bachelor technology computer science engineering', 'degree'], ['internship', 'designation'], ['technobits digital vadodara', 'company'], ['intern', 'designation'], ['html5', 'Front End'], ['css3', 'Front End'], ['bootstrap', 'Front End'], ['java script', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['parul university', 'academic-institute'], ['php', 'Back End'], ['sql', 'Back End'], ['java', 'Back End']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 2, pred: 2\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "designation : 2, pred: 3\n", + "designation : 150.0\n", + "\n", + "company : 1, pred: 1\n", + "company : 100.0\n", + "\n", + "job-duration : 1, pred: 0\n", + "job-duration : 0.0\n", + "\n", + "degree : 3, pred: 1\n", + "degree : 33.33333333333333\n", + "\n", + "academic-institute : 3, pred: 1\n", + "academic-institute : 33.33333333333333\n", + "\n", + "soft-skills : 4, pred: 1\n", + "soft-skills : 25.0\n", + "\n", + "Front End : 6, pred: 6\n", + "Front End : 100.0\n", + "\n", + "Back End : 3, pred: 3\n", + "Back End : 100.0\n", + "\n", + "Mobile App : 1, pred: 0\n", + "Mobile App : 0.0\n", + "\n", + "Accuracy per resume: 71.42857142857143\n", + "\n", + "[['firebase', 'core-skills'], ['react native', 'Mobile App'], ['xcode', 'tools'], ['nitin\\njanyani', 'name'], ['software developer', 'core-skills'], ['highly interested developing', 'soft-skills'], ['team player attitude', 'soft-skills'], ['node js', 'Back End'], ['git', 'tools'], ['sql', 'Back End'], ['android studio', 'tools'], ['vs code', 'tools'], ['balaji college computer application', 'academic-institute'], ['7046706306', 'phone_no'], ['nitinjaniyani7@gmail.com', 'email'], ['bloodman', 'tools'], ['flutter', 'Mobile App'], ['aws', 'cloud-platforms'], ['2.5 years\\nexperience', 'Experience'], ['dart', 'Mobile App'], ['java script', 'Front End'], ['robust applications', 'core-skills'], ['actonate solution', 'company'], ['eklavya public school junagadh', 'academic-institute'], ['s s c', 'degree'], ['app development', 'core-skills'], ['ui animation', 'core-skills'], ['h s c', 'degree'], ['sun shine school bantva', 'academic-institute'], ['6351948814', 'phone_no'], ['flutter animation', 'Mobile App'], ['google maps', 'Mobile App'], ['braintree payment', 'Mobile App'], ['ci cd system', 'tools'], ['sunflower lab', 'company']]\n", + "[['software developer', 'core-skills'], ['app development', 'core-skills'], ['team player attitude', 'soft-skills'], ['react native', 'Mobile App'], ['node js', 'Back End'], ['git', 'tools'], ['dart', 'Mobile App'], ['java script', 'cloud-platforms'], ['sql', 'Back End'], ['android studio', 'tools'], ['vs code', 'tools'], ['storecash flutter', 'core-skills'], ['firebase', 'cloud-platforms'], ['firebase analytics', 'cloud-platforms'], ['eklavya public', 'tools'], ['6351948814', 'phone_no'], ['7046706306', 'phone_no'], ['nitinjaniyani7@gmail.com', 'email'], ['flutter', 'Mobile App'], ['technicians', 'tools']]\n", + "name : 1, pred: 0\n", + "name : 0.0\n", + "\n", + "phone_no : 2, pred: 2\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "company : 2, pred: 0\n", + "company : 0.0\n", + "\n", + "Experience : 1, pred: 0\n", + "Experience : 0.0\n", + "\n", + "degree : 2, pred: 0\n", + "degree : 0.0\n", + "\n", + "academic-institute : 3, pred: 0\n", + "academic-institute : 0.0\n", + "\n", + "tools : 6, pred: 3\n", + "tools : 50.0\n", + "\n", + "core-skills : 5, pred: 2\n", + "core-skills : 40.0\n", + "\n", + "soft-skills : 2, pred: 1\n", + "soft-skills : 50.0\n", + "\n", + "cloud-platforms : 1, pred: 0\n", + "cloud-platforms : 0.0\n", + "\n", + "Front End : 1, pred: 1\n", + "Front End : 100.0\n", + "\n", + "Back End : 2, pred: 2\n", + "Back End : 100.0\n", + "\n", + "Mobile App : 6, pred: 4\n", + "Mobile App : 66.66666666666666\n", + "\n", + "Accuracy per resume: 45.714285714285715\n", + "\n", + "[['graduation information technology', 'degree'], ['dharmsinh desai university nadiad', 'academic-institute'], ['web application', 'core-skills'], ['data science', 'core-skills'], ['data analysis', 'core-skills'], ['machine learning', 'core-skills'], ['c', 'Back End'], ['c++', 'Back End'], ['web development', 'core-skills'], ['python', 'Back End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap4', 'Front End'], ['javascript', 'Front End'], ['mysql', 'databases'], ['mongodb', 'databases'], ['netbeans', 'tools'], ['adobe illustrator', 'tools'], ['android studio', 'tools'], ['numpy', 'Libraries'], ['pandas', 'Libraries'], ['matplotlib', 'Libraries'], ['seaborn', 'Libraries'], ['scikit learn', 'Libraries'], ['flask', 'Libraries'], ['nltk', 'Libraries'], ['node.js', 'Back End'], ['rest api', 'Back End'], ['heroku', 'tools'], ['bootstrap', 'Front End'], ['ibm', 'cloud-platforms'], ['leadership', 'soft-skills'], ['positive attitude', 'soft-skills'], ['savanismit11@gmail.com', 'email'], ['xii', 'degree'], ['x', 'degree'], ['logical reasoning', 'soft-skills'], ['kotlin', 'Back End'], ['git', 'tools'], ['jupyter notebook', 'tools'], ['visual studio code', 'tools'], ['react', 'Back End'], ['nlp', 'core-skills'], ['natural language processing', 'core-skills'], ['robotics', 'core-skills'], ['crud', 'databases'], ['co ordinator', 'soft-skills'], ['creative team player', 'soft-skills'], ['https://www.linkedin.com/in/smit-savani/', 'linkedin'], ['https://github.com/savanismit', 'github']]\n", + "[['information technology', 'degree'], ['dharmsinh desai university nadiad', 'academic-institute'], ['xii(h.s.c', 'degree'], ['x(s.s.c', 'degree'], ['web application', 'core-skills'], ['data science', 'core-skills'], ['data analysis', 'core-skills'], ['machine learning', 'core-skills'], ['c', 'Back End'], ['c++', 'Back End'], ['kotlin', 'Back End'], ['web development', 'core-skills'], ['python', 'Back End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap4', 'Front End'], ['javascript', 'Front End'], ['mysql', 'databases'], ['mongodb', 'databases'], ['git', 'tools'], ['jupyter notebook', 'tools'], ['visual studio code', 'tools'], ['netbeans', 'tools'], ['adobe illustrator', 'tools'], ['android studio', 'tools'], ['numpy', 'Libraries'], ['pandas', 'Libraries'], ['scikit learn', 'Libraries'], ['flask', 'Libraries'], ['nltk', 'Libraries'], ['node.js', 'Back End'], ['react', 'Front End'], ['natural language processing', 'Libraries'], ['rest api', 'Back End'], ['bootstrap', 'Front End'], ['problem solving', 'soft-skills'], ['robotics', 'core-skills'], ['ibm data science course', 'company'], ['leadership', 'soft-skills'], ['positive attitude', 'soft-skills'], ['team player', 'soft-skills'], ['9067064044', 'phone_no'], ['savanismit11@gmail.com', 'email'], ['https://github.com/savanismit', 'github']]\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "linkedin : 1, pred: 0\n", + "linkedin : 0.0\n", + "\n", + "github : 1, pred: 1\n", + "github : 100.0\n", + "\n", + "degree : 3, pred: 4\n", + "degree : 133.33333333333331\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "databases : 3, pred: 2\n", + "databases : 66.66666666666666\n", + "\n", + "tools : 7, pred: 6\n", + "tools : 85.71428571428571\n", + "\n", + "core-skills : 8, pred: 6\n", + "core-skills : 75.0\n", + "\n", + "soft-skills : 5, pred: 3\n", + "soft-skills : 60.0\n", + "\n", + "cloud-platforms : 1, pred: 0\n", + "cloud-platforms : 0.0\n", + "\n", + "Front End : 5, pred: 5\n", + "Front End : 100.0\n", + "\n", + "Back End : 7, pred: 7\n", + "Back End : 100.0\n", + "\n", + "Libraries : 7, pred: 5\n", + "Libraries : 71.42857142857143\n", + "\n", + "Accuracy per resume: 82.0\n", + "\n", + "[['microsoft word document', 'tools'], ['9636904727', 'phone_no'], ['varshamenghnani@gmail.com', 'email'], ['javascript', 'Front End'], ['html5', 'Front End'], ['css3', 'Front End'], ['bootstrap', 'Front End'], ['java', 'Back End'], ['c', 'Back End'], ['c++', 'Back End'], ['infosys pvt ltd', 'company'], ['html', 'Front End'], ['css', 'Front End'], ['oracle', 'databases'], ['distributors', 'core-skills'], ['rls', 'Front End'], ['angularjs', 'Front End'], ['jsp', 'Front End'], ['hsc', 'degree'], ['secondary', 'degree'], ['ssc', 'degree'], ['android application', 'core-skills'], ['mysql', 'databases'], ['ajax', 'Front End'], ['b. tech computer science engineering', 'degree'], ['1.5 years professional experience', 'Experience'], ['february 2017 august 2018', 'job-duration'], ['j2ee', 'tools'], ['express', 'Back End'], ['logistics', 'core-skills'], ['struts', 'Front End'], ['rpv', 'tools'], ['system engineer', 'core-skills'], ['bachelor technology computer science engineering', 'degree'], ['jodhpur \\ninstitute engineering technology', 'academic-institute'], ['wi‐fi', 'tools'], ['apache', 'cloud-platforms'], ['tomcat', 'cloud-platforms']]\n", + "[['microsoft word document', 'tools'], ['b. tech computer science engineering', 'degree'], ['9636904727', 'phone_no'], ['varshamenghnani@gmail.com', 'email'], ['angularjs', 'Front End'], ['javascript', 'Front End'], ['html5', 'Front End'], ['css3', 'Front End'], ['bootstrap', 'Front End'], ['java', 'Back End'], ['c++', 'Back End'], ['infosys pvt ltd', 'company'], ['html', 'Front End'], ['css', 'Front End'], ['oracle', 'databases'], ['bachelor technology computer science engineering', 'degree'], ['hsc', 'degree'], ['secondary', 'degree'], ['ssc', 'degree'], ['android application', 'core-skills'], ['mysql', 'databases'], ['ajax', 'Front End'], ['9636304727', 'phone_no']]\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "company : 1, pred: 1\n", + "company : 100.0\n", + "\n", + "job-duration : 1, pred: 0\n", + "job-duration : 0.0\n", + "\n", + "Experience : 1, pred: 0\n", + "Experience : 0.0\n", + "\n", + "degree : 5, pred: 5\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 0\n", + "academic-institute : 0.0\n", + "\n", + "databases : 2, pred: 2\n", + "databases : 100.0\n", + "\n", + "tools : 4, pred: 1\n", + "tools : 25.0\n", + "\n", + "core-skills : 4, pred: 1\n", + "core-skills : 25.0\n", + "\n", + "cloud-platforms : 2, pred: 0\n", + "cloud-platforms : 0.0\n", + "\n", + "Front End : 11, pred: 8\n", + "Front End : 72.72727272727273\n", + "\n", + "Back End : 4, pred: 2\n", + "Back End : 50.0\n", + "\n", + "Accuracy per resume: 57.89473684210527\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['keval padsumbiya', 'name'], ['kppadsumbiya@gmail.com', 'email'], ['9825497726', 'phone_no'], ['github.com/kevalpadsumbiya', 'github'], ['data structures\\n', 'core-skills'], ['python', 'Back End'], ['java', 'Back End'], ['c++', 'Back End'], ['c', 'Back End'], ['django', 'Libraries'], ['php', 'Back End'], ['learn new', 'soft-skills'], ['junior php developer(4 weeks', 'designation'], ['silverwing technologies pvt ltd', 'company'], ['web development', 'core-skills'], ['linkedin.com/in/keval-\\npadsumbiya', 'linkedin'], ['data\\nstructures\\nalgorithms\\n', 'core-skills'], ['btech information technology', 'degree'], ['image service', 'tools'], ['birla vishvakarma mahavidhyalaya engineering college', 'academic-institute'], ['software engineering', 'core-skills'], ['https://github.com/kevalpadsumbiya/problem-finder', 'github'], ['https://github.com/kevalpadsumbiya/share-images', 'github']]\n", + "[['keval padsumbiya', 'name'], ['kppadsumbiya@gmail.com', 'email'], ['9825497726', 'phone_no'], ['linkedin.com/in/keval-', 'linkedin'], ['github.com/kevalpadsumbiya', 'linkedin'], ['problem solving', 'soft-skills'], ['python', 'Back End'], ['java', 'Back End'], ['c++', 'Back End'], ['c', 'Back End'], ['django', 'Libraries'], ['php', 'Back End'], ['junior php developer(4', 'designation'], ['web development', 'core-skills'], ['btech information technology', 'degree'], ['vishvakarma mahavidhyalaya engineering college', 'academic-institute']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "linkedin : 1, pred: 1\n", + "linkedin : 100.0\n", + "\n", + "github : 3, pred: 0\n", + "github : 0.0\n", + "\n", + "designation : 1, pred: 1\n", + "designation : 100.0\n", + "\n", + "company : 1, pred: 0\n", + "company : 0.0\n", + "\n", + "degree : 1, pred: 1\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "tools : 1, pred: 0\n", + "tools : 0.0\n", + "\n", + "core-skills : 4, pred: 1\n", + "core-skills : 25.0\n", + "\n", + "soft-skills : 1, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "Back End : 5, pred: 5\n", + "Back End : 100.0\n", + "\n", + "Libraries : 1, pred: 1\n", + "Libraries : 100.0\n", + "\n", + "Accuracy per resume: 60.86956521739131\n", + "\n", + "[['css', 'Front End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['hibernate', 'Back End'], ['mysql', 'databases'], ['github', 'tools'], ['vs code', 'tools'], ['php', 'Back End'], ['c', 'Back End'], ['c++', 'Back End'], ['b. tech anand', 'degree'], ['information\\ntechnology', 'degree'], ['h.s.c', 'degree'], ['s.s.c', 'degree'], ['praviraj4168@gmail.com', 'email'], ['python', 'Back End'], ['spring boot', 'Back End'], ['angular-10', 'Front End'], ['training skills', 'soft-skills'], ['html 5', 'Front End'], ['bootstrap 4', 'Front End'], ['angular 10', 'Front End'], ['java', 'Back End'], ['jdbc rest api', 'Back End'], ['maven', 'tools'], ['spring tool suite 4', 'tools'], ['agricultural\\nuniversity\\ncollege', 'academic-institute'], ['shri h. l. patel\\nsaraswati vidyalay\\nmodasa', 'academic-institute'], ['gayatri vidyalay\\niploda', 'academic-institute'], ['pranami raviraj n.', 'name'], ['8849760507', 'phone_no'], ['bootstrap -4', 'Front End'], ['bootstrap-4', 'Front End'], ['remote sensing geographical information system', 'core-skills'], ['global navigation\\nsatellite system', 'tools'], ['remote sensing geospatial technologies', 'core-skills'], ['information technology', 'core-skills'], ['post graduate', 'degree'], ['vallabh vidyanagar', 'academic-institute']]\n", + "[['html', 'Front End'], ['css', 'Front End'], ['javascript', 'Front End'], ['bootstrap 4', 'Front End'], ['jquery', 'Front End'], ['angular', 'Front End'], ['rest api', 'Back End'], ['spring boot', 'Libraries'], ['hibernate', 'Back End'], ['mysql', 'databases'], ['git', 'tools'], ['github', 'tools'], ['php', 'Back End'], ['c', 'Back End'], ['c++', 'Back End'], ['b. tech anand', 'degree'], ['h.s.c', 'degree'], ['saraswati vidyalay\\nmodasa', 'academic-institute'], ['s.s.c', 'degree'], ['gayatri vidyalay', 'academic-institute'], ['n.', 'soft-skills'], ['praviraj4168@gmail.com', 'email'], ['bootstrap', 'Front End'], ['python', 'Back End'], ['information technology', 'degree'], ['g. h.\\npatel', 'academic-institute'], ['computer science', 'core-skills']]\n", + "name : 1, pred: 0\n", + "name : 0.0\n", + "\n", + "phone_no : 1, pred: 0\n", + "phone_no : 0.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "degree : 5, pred: 3\n", + "degree : 60.0\n", + "\n", + "academic-institute : 4, pred: 2\n", + "academic-institute : 50.0\n", + "\n", + "databases : 1, pred: 1\n", + "databases : 100.0\n", + "\n", + "tools : 5, pred: 2\n", + "tools : 40.0\n", + "\n", + "core-skills : 3, pred: 0\n", + "core-skills : 0.0\n", + "\n", + "soft-skills : 1, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "Front End : 9, pred: 4\n", + "Front End : 44.44444444444444\n", + "\n", + "Back End : 8, pred: 6\n", + "Back End : 75.0\n", + "\n", + "Accuracy per resume: 48.717948717948715\n", + "\n", + "[['html', 'Front End'], ['css', 'Front End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['bootstrap', 'Front End'], ['sanketparmar465@gmail.com', 'email'], ['sanket parmar', 'name'], ['web developer', 'core-skills'], ['b.tech cse', 'degree'], ['hsc', 'degree'], ['navrachna university', 'academic-institute'], ['atharva vidhyalaya', 'academic-institute'], ['web\\ndeveloper', 'designation'], ['internship', 'designation'], ['ssc', 'degree'], ['php', 'Back End'], ['mysql', 'databases'], ['wordpress', 'tools'], ['adobe xd', 'tools'], ['knowledge skills', 'soft-skills'], ['7043176045', 'phone_no'], ['nethuts solutions inc', 'company'], ['joined sep,2020 present', 'job-duration'], ['ankur vidhyalaya', 'academic-institute']]\n", + "[['html', 'Front End'], ['css', 'Front End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['bootstrap', 'Front End'], ['7043176045', 'phone_no'], ['sanketparmar465@gmail.com', 'email'], ['sanket parmar', 'name'], ['web developer', 'core-skills'], ['b.tech cse', 'degree'], ['hsc', 'degree'], ['navrachna university', 'academic-institute'], ['nethuts solutions inc', 'company'], ['internship', 'designation'], ['ssc', 'degree'], ['php', 'Back End'], ['mysql', 'databases'], ['wordpress', 'tools'], ['adobe', 'tools']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "designation : 2, pred: 1\n", + "designation : 50.0\n", + "\n", + "company : 1, pred: 1\n", + "company : 100.0\n", + "\n", + "job-duration : 1, pred: 0\n", + "job-duration : 0.0\n", + "\n", + "degree : 3, pred: 3\n", + "degree : 100.0\n", + "\n", + "academic-institute : 3, pred: 1\n", + "academic-institute : 33.33333333333333\n", + "\n", + "databases : 1, pred: 1\n", + "databases : 100.0\n", + "\n", + "tools : 2, pred: 2\n", + "tools : 100.0\n", + "\n", + "core-skills : 1, pred: 1\n", + "core-skills : 100.0\n", + "\n", + "soft-skills : 1, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "Front End : 5, pred: 5\n", + "Front End : 100.0\n", + "\n", + "Back End : 1, pred: 1\n", + "Back End : 100.0\n", + "\n", + "Accuracy per resume: 79.16666666666666\n", + "\n", + "[['microsoft word', 'tools'], ['microsoft powerpoint', 'tools'], ['php', 'Back End'], ['html', 'Front End'], ['css', 'Front End'], ['javascript', 'Front End'], ['s.s.c', 'degree'], ['high school', 'degree'], ['h.s.c', 'degree'], ['babaria institute', 'academic-institute'], ['rushabhrana51@gmail.com', 'email'], ['9824255134', 'phone_no'], ['microsoft excel', 'tools'], ['rushabh\\nrana', 'name'], ['work dedicated', 'soft-skills'], ['creative willing learn', 'soft-skills'], ['punctuality work', 'soft-skills'], ['friendly upbeat attitude', 'soft-skills'], ['bachelor engineering computer engineering', 'degree']]\n", + "[['rushabh\\nrana', 'name'], ['microsoft excel', 'tools'], ['microsoft word', 'tools'], ['microsoft powerpoint', 'tools'], ['php', 'Back End'], ['html', 'Front End'], ['css', 'Front End'], ['javascript', 'Front End'], ['s.s.c', 'degree'], ['high school', 'degree'], ['h.s.c', 'degree'], ['bachelor engineering computer engineering\\ncollege', 'degree'], ['achieved computer engineering course babaria institute', 'academic-institute'], ['rushabhrana51@gmail.com', 'email'], ['9824255134', 'phone_no']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "degree : 4, pred: 4\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "tools : 3, pred: 3\n", + "tools : 100.0\n", + "\n", + "soft-skills : 4, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "Front End : 3, pred: 3\n", + "Front End : 100.0\n", + "\n", + "Back End : 1, pred: 1\n", + "Back End : 100.0\n", + "\n", + "Accuracy per resume: 78.94736842105263\n", + "\n", + "[['rohan dhamecha', 'name'], ['jr software engineer', 'designation'], ['rohandhamecha45@gmail.com', 'email'], ['9408771346', 'phone_no'], ['healthcare informatics pvt ltd', 'company'], ['06/2018 present', 'job-duration'], ['web api', 'core-skills'], ['web application', 'core-skills'], ['bachelor engineering', 'degree'], ['maharaja sayajirao university baorda', 'academic-institute'], ['computer science\\nengineering', 'degree'], ['csharp', 'Back End'], ['ms sql', 'databases'], ['mongodb', 'databases'], ['linq', 'databases'], ['udemy developer', 'cloud-platforms'], ['vb.net', 'Back End'], ['linkedin.com/in/rdhamecha', 'linkedin'], ['ehr', 'tools'], ['.net framework', 'Libraries'], ['angularjs', 'Front End'], ['.net webapi', 'Back End'], ['aws s3', 'cloud-platforms'], ['advances computing communication informatics', 'core-skills']]\n", + "[['rohan dhamecha', 'name'], ['jr software engineer', 'designation'], ['rohandhamecha45@gmail.com', 'email'], ['9408771346', 'phone_no'], ['healthcare informatics pvt ltd', 'company'], ['06/2018 present', 'job-duration'], ['angularjs', 'Front End'], ['aws s3', 'cloud-platforms'], ['web api', 'core-skills'], ['web application', 'core-skills'], ['bachelor engineering', 'degree'], ['maharaja sayajirao university baorda', 'academic-institute'], ['computer science\\nengineering', 'degree'], ['csharp', 'Back End'], ['vb.net', 'Back End'], ['ms sql', 'databases'], ['mongodb', 'databases'], ['linq', 'databases'], ['entity framework', 'tools'], ['udemy developer', 'core-skills']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "linkedin : 1, pred: 0\n", + "linkedin : 0.0\n", + "\n", + "designation : 1, pred: 1\n", + "designation : 100.0\n", + "\n", + "company : 1, pred: 1\n", + "company : 100.0\n", + "\n", + "job-duration : 1, pred: 1\n", + "job-duration : 100.0\n", + "\n", + "degree : 2, pred: 2\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "databases : 3, pred: 3\n", + "databases : 100.0\n", + "\n", + "tools : 1, pred: 0\n", + "tools : 0.0\n", + "\n", + "core-skills : 3, pred: 2\n", + "core-skills : 66.66666666666666\n", + "\n", + "cloud-platforms : 2, pred: 1\n", + "cloud-platforms : 50.0\n", + "\n", + "Front End : 1, pred: 1\n", + "Front End : 100.0\n", + "\n", + "Back End : 3, pred: 2\n", + "Back End : 66.66666666666666\n", + "\n", + "Libraries : 1, pred: 0\n", + "Libraries : 0.0\n", + "\n", + "Accuracy per resume: 75.0\n", + "\n", + "[['tirth jain', 'name'], ['mca', 'degree'], ['php', 'Back End'], ['angularjs', 'Front End'], ['web development', 'core-skills'], ['codeigniter', 'Back End'], ['laravel', 'Back End'], ['android studio', 'tools'], ['wordpress', 'tools'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap', 'Front End'], ['mysql', 'databases'], ['nodejs', 'Back End'], ['expressjs', 'Back End'], ['socet.io', 'Back End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['python', 'Back End'], ['django', 'Libraries'], ['reactjs', 'Front End'], ['firebase', 'cloud-platforms'], ['bca', 'degree'], ['internship', 'designation'], ['html5', 'Front End'], ['css3,jquery', 'Front End'], ['bootstrap4', 'Front End'], ['node server+expressjs+php+mysql', 'tools'], ['ajax', 'Front End'], ['nodjs +', 'core-skills'], ['java', 'Back End'], ['web application', 'core-skills'], ['ethical hacking', 'core-skills'], ['rapidly build', 'soft-skills'], ['cope different situation', 'soft-skills'], ['leadership skills', 'soft-skills'], ['positive attitude', 'soft-skills'], ['independently team', 'soft-skills'], ['software developer', 'designation'], ['hard working', 'soft-skills'], ['quick learner', 'soft-skills'], ['7984814283', 'phone_no'], ['tirth886jain@gmail.com', 'email'], ['finbyz tech pvt ltd', 'company'], ['fullstack', 'designation'], ['dev align connect pvt ltd', 'company'], ['grras pvt ltd', 'company'], ['niit ahmedabad', 'academic-institute'], ['routing switching', 'core-skills'], ['electron js', 'Back End'], ['routing switching configuration', 'core-skills'], ['10th', 'degree'], ['12th', 'degree'], ['nelson high\\nschool state', 'academic-institute'], ['teerthanker\\nmahaveer\\nuniversity', 'academic-institute'], ['chimanbhai\\ninstitute\\ncomputer\\napplication', 'academic-institute'], ['angular js', 'Front End'], ['grass pvt solution', 'company'], ['networking', 'core-skills'], ['a+/n+/ccna+server', 'Libraries'], ['3rd june\\n2019 15th feb 2020', 'job-duration'], ['fullstack developer', 'designation'], ['align connect pvt ltd', 'company'], ['1st june\\n2020 present', 'job-duration'], ['mysql ', 'databases'], ['node server', 'Back End'], ['crud', 'databases'], ['js', 'Front End'], ['web socket', 'Back End'], ['web socket node', 'Back End'], ['crm mobile', 'Mobile App'], ['teerthanker\\nmahaveer university', 'academic-institute'], ['teethanker\\nmahaver university', 'academic-institute'], ['confident', 'soft-skills'], ['high level professionalism', 'soft-skills'], ['active listener', 'soft-skills']]\n", + "[['mca', 'degree'], ['7984814283', 'phone_no'], ['tirth886jain@gmail.com', 'email'], ['grras pvt ltd ahmedabad', 'academic-institute'], ['php', 'Back End'], ['angularjs', 'Front End'], ['web development', 'core-skills'], ['codeigniter', 'Back End'], ['laravel', 'Back End'], ['android studio', 'tools'], ['wordpress', 'tools'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap', 'Front End'], ['mysql', 'databases'], ['nodejs', 'Back End'], ['expressjs', 'Back End'], ['socet.io', 'Back End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['electron', 'Front End'], ['js', 'Front End'], ['python', 'Back End'], ['django', 'Libraries'], ['reactjs', 'Front End'], ['firebase', 'cloud-platforms'], ['10th', 'degree'], ['bca', 'degree'], ['js grass pvt solution', 'company'], ['internship', 'designation'], ['finbyz tech pvt ltd', 'company'], ['9 month 3rd june\\n2019 15th feb 2020', 'job-duration'], ['6 month 1st june\\n2020 present', 'job-duration'], ['html5', 'Front End'], ['bootstrap4', 'Front End'], ['php+mysql', 'databases'], ['node', 'Back End'], ['crud', 'databases'], ['ajax', 'Front End'], ['react', 'Front End'], ['java', 'Back End'], ['computer science', 'degree'], ['ethical hacking', 'core-skills'], ['leadership skills', 'soft-skills'], ['positive attitude', 'soft-skills'], ['operate independently team', 'soft-skills'], ['software developer', 'core-skills'], ['hard working', 'soft-skills'], ['quick learner', 'soft-skills']]\n", + "name : 1, pred: 0\n", + "name : 0.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "designation : 4, pred: 1\n", + "designation : 25.0\n", + "\n", + "company : 5, pred: 2\n", + "company : 40.0\n", + "\n", + "job-duration : 2, pred: 2\n", + "job-duration : 100.0\n", + "\n", + "degree : 4, pred: 3\n", + "degree : 75.0\n", + "\n", + "academic-institute : 6, pred: 0\n", + "academic-institute : 0.0\n", + "\n", + "databases : 3, pred: 4\n", + "databases : 133.33333333333331\n", + "\n", + "tools : 3, pred: 2\n", + "tools : 66.66666666666666\n", + "\n", + "core-skills : 7, pred: 2\n", + "core-skills : 28.57142857142857\n", + "\n", + "soft-skills : 10, pred: 5\n", + "soft-skills : 50.0\n", + "\n", + "cloud-platforms : 1, pred: 1\n", + "cloud-platforms : 100.0\n", + "\n", + "Front End : 13, pred: 11\n", + "Front End : 84.61538461538461\n", + "\n", + "Back End : 12, pred: 8\n", + "Back End : 66.66666666666666\n", + "\n", + "Mobile App : 1, pred: 0\n", + "Mobile App : 0.0\n", + "\n", + "Libraries : 2, pred: 1\n", + "Libraries : 50.0\n", + "\n", + "Accuracy per resume: 57.89473684210527\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['vishal kumar', 'name'], ['vishalyo990@gmail.com', 'email'], ['python', 'Back End'], ['angular developer', 'Front End'], ['good interpersonal skills', 'soft-skills'], ['python developer', 'designation'], ['angular', 'Front End'], ['rest api', 'Back End'], ['flask', 'Libraries'], ['microservices architecture', 'tools'], ['azure cloud', 'cloud-platforms'], ['dronacharya college engineering gurgoan', 'academic-institute'], ['bachelor technology computer science engineering', 'degree'], ['docker', 'tools'], ['leadership team player', 'soft-skills'], ['6352282184', 'phone_no'], ['2 + years experience', 'Experience'], ['communication skills', 'soft-skills'], ['core competencies', 'core-skills'], ['backend development', 'core-skills'], ['automation tasks', 'core-skills'], ['frontend development', 'core-skills'], ['cloud deployment', 'core-skills'], ['stemmons business services pvt ltd', 'company'], ['jun 2019 present', 'job-duration'], ['selenium web testing', 'tools'], ['transpipe integrity solution pvt ltd', 'company'], ['jun 2018 jun 2019', 'job-duration'], ['kendriya vidyalaya pathankot', 'academic-institute'], ['xth', 'degree'], ['xiith', 'degree'], ['git', 'tools'], ['bitbucket', 'tools'], ['adapt', 'soft-skills']]\n", + "[['vishal kumar', 'name'], ['6352282184', 'phone_no'], ['vishalyo990@gmail.com', 'email'], ['communication skills', 'soft-skills'], ['good interpersonal skills', 'soft-skills'], ['stemmons business services pvt ltd', 'company'], ['python developer', 'designation'], ['rest api', 'Back End'], ['flask', 'Libraries'], ['metadata', 'core-skills'], ['azure cloud containers aci', 'cloud-platforms'], ['dronacharya college engineering gurgoan', 'academic-institute'], ['bachelor technology computer science engineering', 'degree'], ['kendriya vidyalaya pathankot', 'academic-institute'], ['git', 'tools'], ['bitbucket', 'tools'], ['docker', 'tools'], ['leadership', 'soft-skills'], ['team player', 'soft-skills']]\n", + "name : 1, pred: 1\n", + "name : 100.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "designation : 1, pred: 1\n", + "designation : 100.0\n", + "\n", + "company : 2, pred: 1\n", + "company : 50.0\n", + "\n", + "job-duration : 2, pred: 0\n", + "job-duration : 0.0\n", + "\n", + "Experience : 1, pred: 0\n", + "Experience : 0.0\n", + "\n", + "degree : 3, pred: 1\n", + "degree : 33.33333333333333\n", + "\n", + "academic-institute : 2, pred: 2\n", + "academic-institute : 100.0\n", + "\n", + "tools : 5, pred: 3\n", + "tools : 60.0\n", + "\n", + "core-skills : 5, pred: 0\n", + "core-skills : 0.0\n", + "\n", + "soft-skills : 4, pred: 4\n", + "soft-skills : 100.0\n", + "\n", + "cloud-platforms : 1, pred: 1\n", + "cloud-platforms : 100.0\n", + "\n", + "Front End : 2, pred: 0\n", + "Front End : 0.0\n", + "\n", + "Back End : 2, pred: 1\n", + "Back End : 50.0\n", + "\n", + "Libraries : 1, pred: 1\n", + "Libraries : 100.0\n", + "\n", + "Accuracy per resume: 52.94117647058824\n", + "\n", + "[['computer engineer', 'degree'], ['c', 'Back End'], ['c++', 'Back End'], ['java', 'Back End'], ['oops concept', 'core-skills'], ['html', 'Front End'], ['css', 'Front End'], ['intership', 'designation'], ['programmer', 'Back End'], ['8490884564', 'phone_no'], ['gujarat technological\\nuniversity', 'academic-institute'], ['neotech institute technology', 'academic-institute'], ['bachelor computer engineering', 'degree'], ['iot', 'core-skills'], ['python', 'Back End'], ['android', 'core-skills'], ['high school', 'degree'], ['shree narayan high school', 'academic-institute'], ['sardar vallabhbhai vidhyalay', 'academic-institute'], ['vrunda bhavsar', 'name'], ['professional software engineer', 'designation'], ['jvm encapsulation', 'tools'], ['static inheritance', 'core-skills'], ['cap world system', 'company'], ['opps', 'core-skills'], ['spring', 'Back End'], ['hibernate', 'Back End'], ['.netxpert', 'Back End'], ['vrundabhavsar1111@g\\nmail.com', 'email'], ['image processing', 'core-skills'], ['ability analyze complex', 'soft-skills'], ['complete work time', 'soft-skills']]\n", + "[['software engineer', 'designation'], ['computer engineer', 'designation'], ['c', 'Back End'], ['c++', 'Back End'], ['java', 'Back End'], ['html', 'Front End'], ['css', 'Front End'], ['intership\\nprogrammer', 'designation'], ['cap world system', 'company'], ['hibernate', 'Back End'], ['spring', 'tools'], ['8490884564', 'phone_no'], ['vrundabhavsar1111@g', 'linkedin'], ['gujarat technological\\nuniversity\\nneotech institute technology', 'academic-institute'], ['bachelor computer engineering', 'degree'], ['iot', 'degree'], ['python', 'Back End'], ['android', 'core-skills'], ['high school', 'degree'], ['shree narayan high school', 'academic-institute'], ['sardar vallabhbhai vidhyalay', 'academic-institute']]\n", + "name : 1, pred: 0\n", + "name : 0.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 0\n", + "email : 0.0\n", + "\n", + "designation : 2, pred: 2\n", + "designation : 100.0\n", + "\n", + "company : 1, pred: 1\n", + "company : 100.0\n", + "\n", + "degree : 3, pred: 3\n", + "degree : 100.0\n", + "\n", + "academic-institute : 4, pred: 4\n", + "academic-institute : 100.0\n", + "\n", + "tools : 1, pred: 0\n", + "tools : 0.0\n", + "\n", + "core-skills : 6, pred: 1\n", + "core-skills : 16.666666666666664\n", + "\n", + "soft-skills : 2, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "Front End : 2, pred: 2\n", + "Front End : 100.0\n", + "\n", + "Back End : 8, pred: 6\n", + "Back End : 75.0\n", + "\n", + "Accuracy per resume: 62.5\n", + "\n", + "[['shubham.chauhan146@gmail.com', 'email'], ['8602316148', 'phone_no'], ['4 + years experience', 'Experience'], ['software development', 'core-skills'], ['bachelor engineering information technology', 'degree'], ['sabertooth technologies pvt ltd', 'company'], ['march 2019 june 2020', 'job-duration'], ['software developer', 'designation'], ['january 2017 february 2019', 'job-duration'], ['google api', 'Back End'], ['firebase', 'cloud-platforms'], ['java', 'Back End'], ['micro learning', 'core-skills'], ['c++', 'Back End'], ['xml', 'Front End'], ['json', 'Front End'], ['hibernate', 'Back End'], ['web programming', 'core-skills'], ['rest api', 'Mobile App'], ['web api', 'core-skills'], ['servlets jsp', 'Back End'], ['sql', 'Back End'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['ajax', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap', 'Front End'], ['sublime', 'tools'], ['android studio', 'tools'], ['visual studio', 'tools'], ['team hard work', 'soft-skills'], ['july 2020 present', 'job-duration'], ['digi tech indore mp', 'company'], ['react native developer', 'designation'], ['trinity solutions indore mp', 'company'], ['august 2015 december 2016', 'job-duration'], ['realtimedatabase', 'databases'], ['paytm integration', 'Mobile App'], ['shubham chauhan', 'name'], ['laxmi narayan college technology', 'academic-institute'], ['qualwebs indore mp', 'company'], ['google analytics', 'tools'], ['paypal', 'Mobile App'], ['razor pay', 'Mobile App'], ['admob', 'Mobile App'], ['mopub', 'Mobile App'], ['ui design', 'core-skills'], ['gps', 'Mobile App'], ['google map', 'Mobile App'], ['ads integration', 'Mobile App'], ['organizational skills', 'soft-skills'], ['sr react native developer', 'designation'], ['payu money', 'Mobile App'], ['react native', 'Mobile App'], ['unit testing', 'tools'], ['android', 'core-skills'], ['oops', 'core-skills'], ['rdbms', 'databases'], ['spring', 'Back End'], ['mvc', 'core-skills'], ['apache', 'cloud-platforms'], ['tomcat', 'cloud-platforms']]\n", + "[['shubham chauhan cv', 'name'], ['shubham chauhan', 'name'], ['8602316148', 'phone_no'], ['4 + years experience', 'Experience'], ['software development', 'core-skills'], ['bachelor engineering information technology', 'degree'], ['narayan college technology', 'academic-institute'], ['sabertooth technologies pvt ltd', 'company'], ['sr react native developer', 'designation'], ['july 2020 present', 'job-duration'], ['react native developer', 'Mobile App'], ['march 2019 june 2020', 'job-duration'], ['august 2015 december 2016', 'job-duration'], ['google api', 'cloud-platforms'], ['google analytics', 'cloud-platforms'], ['mailto:shubham.chauhan146@gmail.com', 'email'], ['paytm integration', 'core-skills'], ['ads integration', 'tools'], ['react native', 'Mobile App'], ['rest api', 'Back End'], ['java', 'Back End'], ['c++', 'Back End'], ['xml', 'Front End'], ['json', 'Front End'], ['mvc', 'core-skills'], ['web api', 'core-skills'], ['servlets jsp', 'core-skills'], ['sql', 'Back End'], ['firebase', 'cloud-platforms'], ['javascript', 'Front End'], ['jquery', 'Front End'], ['ajax', 'Front End'], ['html', 'Front End'], ['css', 'Front End'], ['bootstrap', 'Front End'], ['sublime', 'tools'], ['android studio', 'tools'], ['visual studio', 'tools']]\n", + "name : 1, pred: 2\n", + "name : 200.0\n", + "\n", + "phone_no : 1, pred: 1\n", + "phone_no : 100.0\n", + "\n", + "email : 1, pred: 1\n", + "email : 100.0\n", + "\n", + "designation : 3, pred: 2\n", + "designation : 66.66666666666666\n", + "\n", + "company : 4, pred: 1\n", + "company : 25.0\n", + "\n", + "job-duration : 4, pred: 3\n", + "job-duration : 75.0\n", + "\n", + "Experience : 1, pred: 1\n", + "Experience : 100.0\n", + "\n", + "degree : 1, pred: 1\n", + "degree : 100.0\n", + "\n", + "academic-institute : 1, pred: 1\n", + "academic-institute : 100.0\n", + "\n", + "databases : 2, pred: 0\n", + "databases : 0.0\n", + "\n", + "tools : 5, pred: 3\n", + "tools : 60.0\n", + "\n", + "core-skills : 8, pred: 3\n", + "core-skills : 37.5\n", + "\n", + "soft-skills : 2, pred: 0\n", + "soft-skills : 0.0\n", + "\n", + "cloud-platforms : 3, pred: 1\n", + "cloud-platforms : 33.33333333333333\n", + "\n", + "Front End : 8, pred: 8\n", + "Front End : 100.0\n", + "\n", + "Back End : 7, pred: 5\n", + "Back End : 71.42857142857143\n", + "\n", + "Mobile App : 11, pred: 2\n", + "Mobile App : 18.181818181818183\n", + "\n", + "Accuracy per resume: 55.55555555555556\n", + "\n", + "\n", + "Accuracy on test set: 62.082514734774065\n", + "\n" + ] + } + ], + "source": [ + "import re\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy.optimize import curve_fit \n", + "\n", + "def check(ref:str, pred:str):\n", + " if len(ref) > len(pred):\n", + " if pred in ref:\n", + " return True\n", + " else:\n", + " if ref in pred:\n", + " return True\n", + " return False\n", + "\n", + "nlp = spacy.load(\"E:\\\\Darsh\\\\DP\\\\Internship and Works\\\\AtliQ Technologies\\\\NER Resume Model\\\\my_model\")\n", + "whole_true = 0\n", + "whole_pred = 0\n", + "for text,annot in test_data:\n", + " true_labels = 0\n", + " pred_labels = 0\n", + " p = []\n", + " pred = []\n", + " true = []\n", + " doc_to_test=nlp(text)\n", + " \n", + " for i in range(len(annot[\"entities\"])):\n", + " true.append(([text[annot[\"entities\"][i][0]:annot[\"entities\"][i][1]] , annot[\"entities\"][i][2] ]))\n", + " a = dict(true)\n", + " true = [[k, v] for k,v in a.items()]\n", + " print(true)\n", + " \n", + " for i in doc_to_test.ents:\n", + " p.append(([str(i).strip(\"\\n\") , i.label_])) \n", + " \n", + " predd = list((dict(p)).items()) \n", + " for i in range(len(predd)):\n", + " pred.append(([predd[i][0], predd[i][1]]))\n", + " print(pred)\n", + " \n", + " for i in LABELS:\n", + " c_t = 0\n", + " c_p = 0\n", + " for j in range(len(true)):\n", + " if true[j][1] == i:\n", + " c_t+=1\n", + " whole_true+=1\n", + " true_labels +=1 \n", + " for k in range(len(pred)):\n", + " #pred[k][0] = pred[k][0].replace(\"(\", \"\")\n", + " if (i != \"Front End\") and (pred[k][1] == i) and (check(true[j][0], pred[k][0]) == True) and ( i != \"Back End\" ):\n", + " #print(true[k][0])\n", + " #print(pred[k][0])\n", + " c_p+=1\n", + " whole_pred+=1\n", + " pred_labels+=1\n", + " continue\n", + " elif (i == \"Front End\" or i == \"Back End\") and (pred[k][0] == true[j][0]):\n", + " c_p+=1\n", + " whole_pred+=1\n", + " pred_labels+=1\n", + " continue \n", + " if c_t == 0:\n", + " continue\n", + " else: \n", + " print(f\"{i} : {c_t}, pred: {c_p}\")\n", + " print(f\"{i} : {(c_p/c_t)*100}\\n\")\n", + " print(f\"Accuracy per resume: {(pred_labels/true_labels)*100}\\n\")\n", + " \n", + "print(f\"\\nAccuracy on test set: {(whole_pred/whole_true)*100}\\n\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index b27de69..8cdca24 100644 --- a/README.md +++ b/README.md @@ -1 +1,39 @@ -# NER Resume Parser +# **NER Resume Parser** + + +## Setup for Python: + +1. Install Python ([Setup Instructions](https://wiki.python.org/moin/BeginnersGuide)) + +2. Install Python packages +``` +pip3 install -r requirements.txt +``` +3. Install MLFlow ([Setup Instructions](https://mlflow.org/docs/latest/quickstart.html)) + +* Tools used are : + **Apache Tika** - For getting pdf formatted files into to the text formatted file. + **Doccano** - For annotating the dataset. + +## Setup for Jupyter Notebook: + +1. Install Everything ([Setup Instructions](https://github.com/darsh8200/ner-resume-parser/blob/main/NER-Spacy.ipynb)) + +## Training the Model + +1. Model is Spacy: + Training Data Set = jsonl formatted file (80 resumes used for training) + Test Data Set = jsonl formatted file (14 resumes used for testing) + +2. Blank spacy model is created with ner pipeline and disabling anyother pipeline than ner and custom labels are added into the ner pipeline. + +3. -- Running .py files and flowchart for it.. + +4. In training shuffled and batch training concept is used for getting better training of the model. + +5. In testing phase each jsonl line will be taken as input as all predictions from the text are generated into the individual text files. + +## Running MLFlow + +1. After training **train.py** run mlflow_spacy.py to see model on MLFlow. + diff --git a/ner_spacy.py b/ner_spacy.py new file mode 100644 index 0000000..69109cf --- /dev/null +++ b/ner_spacy.py @@ -0,0 +1,36 @@ +from pathlib import Path +import spacy +from fastapi import FastAPI +from typing import List, Dict +from typing import Optional +from fastapi import APIRouter +from pydantic import BaseModel + +model_dir = Path("E:\\Darsh\\DP\\Internship and Works\\AtliQ Technologies\\NER Resume Model\\my_model") +id_nlp = spacy.load(model_dir) + + +def get_entities(sentence) -> List[Dict]: + doc = id_nlp(sentence) + return [(ent.text, ent.label_) for ent in doc.ents] + + +class NerQuery(BaseModel): + sentence: str + +router = APIRouter( + prefix='/nlp/id', + tags=['nlp'], + responses={ + 404: {'description': 'Not Found'} + } +) + +@router.post('/ner') +async def api_ner(query: NerQuery): + result = get_entities(query.sentence) + return result + + +app = FastAPI() +app.include_router(router) \ No newline at end of file diff --git a/ner_spacy_serve.py b/ner_spacy_serve.py new file mode 100644 index 0000000..03446e7 --- /dev/null +++ b/ner_spacy_serve.py @@ -0,0 +1,40 @@ +from pathlib import Path +import spacy +from fastapi import FastAPI +from typing import List, Dict +from typing import Optional +from fastapi import APIRouter +from pydantic import BaseModel +import requests +#model_dir = Path("E:\\Darsh\\DP\\Internship and Works\\AtliQ Technologies\\NER Resume Model\\my_model") +#id_nlp = spacy.load(model_dir) + +app = FastAPI() +#endpoint = "http://localhost:8501/my_model:ner" +endpoint = "http:/127.0.0.1:1244/" + +def get_entities(sentence) -> List[Dict]: + doc = requests.post(endpoint, json=sentence) + #doc = id_nlp(sentence) + return [(ent.text, ent.label_) for ent in doc.ents] + + +class NerQuery(BaseModel): + sentence: str + +router = APIRouter( + prefix='/nlp/id', + tags=['nlp'], + responses={ + 404: {'description': 'Not Found'} + } +) + +@router.post('/ner') +async def api_ner(query: NerQuery): + result = get_entities(query.sentence) + return result + + +#app = FastAPI() +app.include_router(router) \ No newline at end of file