-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
75 lines (61 loc) · 2.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 8 02:09:57 2018
@author: Chiranjev Koul
"""
import requests
from bs4 import BeautifulSoup
import re
count = 0
page = requests.get('https://www.interviewbit.com/courses/programming/')
soup = BeautifulSoup(page.content,'html.parser')
file = open("all_interviewbit_questions.txt","w",encoding="utf-8")
topics = soup.find_all(class_ = 'topic-title')
topics_list = []
for i in range(0,len(topics)):
topics_list.append(str.strip(topics[i].get_text()).lower().replace(' ','-'))
#topics_list
for j in range(0,len(topics_list)):
page = requests.get("https://www.interviewbit.com/courses/programming/topics/"+topics_list[j])
# page
file.write("\n\n\n-----------------------------"+topics_list[j]+"----------------------------------\n\n\n")
soup = BeautifulSoup(page.content,'html.parser')
# soup
list_que = soup.find_all(class_ = 'locked')
que_list = []
href_list = []
list_href = soup.findAll('a', attrs={'href': re.compile("^/problems")})
# len(list_que)
for i in range(0,len(list_que)):
que_list.append(str.strip(list_que[i].get_text()).replace('_','').lower())
for i in range(0,len(list_href)):
if 'class="locked"' in str(list_href[i]):
href_list.append(list_href[i].get('href'))
count+=1
# que_list
# len(que_list)
# que_list[0]
# href_list
# len(href_list)
# href_list[0]
for i in range(0,len(que_list)):
url = "https://www.interviewbit.com"+href_list[i]
# print(url)
page = requests.get(url)
# page
soup = BeautifulSoup(page.content,'html.parser')
question_content = soup.find_all(class_ = 'markdown-content')[0].get_text()
# question_content
soup=BeautifulSoup(question_content,'html.parser')
# soup
question = soup.prettify(formatter=None).replace('</vector<int>','')
file.write(str(i+1)+". ")
file.write(question)
file.write('\n')
# print(question)
print('Total number of questions scraped is '+str(count))
file.close()
#
#file = open("question.txt","r")
#content = file.read()
#content