diff --git a/grades.db b/grades.db index 8048327..3f35d9f 100755 Binary files a/grades.db and b/grades.db differ diff --git a/initial.db.sqbpro b/initial.db.sqbpro new file mode 100644 index 0000000..9a55f97 --- /dev/null +++ b/initial.db.sqbpro @@ -0,0 +1,9 @@ +INSERT INTO aggregate +SELECT prof, +dept, +course_nbr, +course_name, +SUM(a), SUM(b), SUM(c), SUM(d), SUM(f), SUM(totalAF) +FROM section_grades +GROUP BY prof, dept, course_nbr, course_name + diff --git a/merge.py b/merge.py new file mode 100644 index 0000000..c3dabe9 --- /dev/null +++ b/merge.py @@ -0,0 +1,12 @@ +import sqlite3 +con3 = sqlite3.connect("db1.db") + +con3.execute("ATTACH 'db2.db' as db2") + +con3.execute("BEGIN") +for row in con3.execute("SELECT * FROM db2.sqlite_master WHERE type='table'"): + combine = "INSERT INTO "+ row[1] + " SELECT * FROM db2." + row[1] + print(combine) + con3.execute(combine) +con3.commit() +con3.execute("detach database db2") diff --git a/rmp.sqlite b/rmp.sqlite new file mode 100644 index 0000000..afb9b8e Binary files /dev/null and b/rmp.sqlite differ diff --git a/scrapy.py b/scrapy.py index 5344b52..600cb4a 100644 --- a/scrapy.py +++ b/scrapy.py @@ -24,17 +24,27 @@ "PLPA","POLS","POSC","PROS","PSAA","PSYC","RDNG","RELS","RENR","RPTS","RUSS","SABR","SCEN","SCMT","SCSC","SEFB","SENG","SOCI","SOMS","SOPH","SPAN","SPED","SPMT","SPSY","STAT", "TCMG","TCMT","TEED","TEFB","THAR","UGST","URPN","URSC","VIBS","VIST","VIZA","VLCS","VMID","VPAT","VSCS","VTMI","VTPB","VTPP","WFSC","WGST","WMHS"] +course_nbr_list = [] + if __name__ == '__main__': - db = sqlite3.connect("initial.db") + db = sqlite3.connect("db4.db") with db: cur = db.cursor() - cur.execute("CREATE TABLE section_grades (sem TEXT, prof TEXT, dept TEXT, course_nbr TEXT, course_name TEXT, a INT, b INT, c INT, d INT, f INT, totalAF INT)") + #cur.execute("CREATE TABLE section_grades (sem TEXT, prof TEXT, dept TEXT, course_nbr TEXT, course_name TEXT, a INT, b INT, c INT, d INT, f INT, totalAF INT)") + + #dept = subject_list[0] + + for dept in subject_list[150:160]: + print(subject_list.index(dept)) + i = requests.get('http://www.aggiescheduler.com/api/search?search=' + dept + '&term=201911') + i_file = i.json() + for cn_dict_init in i_file: + course_nbr_list.append(cn_dict_init.get("course")) - for dept in subject_list: - for course_nbr_int in range(100, 1000): - course_nbr = str(course_nbr_int) + for course_nbr in course_nbr_list: + #course_nbr = str(course_nbr_int) j = requests.get('http://www.aggiescheduler.com/api/grades?course=' + course_nbr + '&subject=' + dept) j_file = j.json() if j_file: @@ -86,5 +96,5 @@ print('***************************')""" cur.execute("INSERT INTO section_grades VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (sem, prof, dept, course_nbr, course_name, a, b, c, d, f, totalAF)) - #TODO:write to file here... + print("*****************************************************") db.close() diff --git a/tamu_grades.db b/tamu_grades.db new file mode 100644 index 0000000..56bcd7f Binary files /dev/null and b/tamu_grades.db differ diff --git a/testscrape.py b/testscrape.py new file mode 100644 index 0000000..8243fe8 --- /dev/null +++ b/testscrape.py @@ -0,0 +1,35 @@ +import scraperwiki +import sqlite3 +from bs4 import BeautifulSoup +import string +import unicodedata +import time +import requests +import json + +headers = ["Name","Department","Total Ratings","Overall Quality","Easiness","Hot"] +#Dictionary of school ids (keys) that map to tuple of school name and number of pages +colleges = {"1003":("Texas A&M",4)} + +for sid in colleges.keys(): + college,pages = colleges[sid] + print college + for i in xrange(1,pages+1): + response = scraperwiki.scrape("http://www.ratemyprofessors.com/SelectTeacher.jsp?sid=%s&pageNo=%s" % (sid,str(i))) + xxxx = requests.get("http://www.ratemyprofessors.com/SelectTeacher.jsp?sid=%s&pageNo=%s" % (sid,str(i))) + print(xxxx.text) + print("http://www.ratemyprofessors.com/SelectTeacher.jsp?sid=%s&pageNo=%s" % (sid,str(i))) + time.sleep(5) + soup = BeautifulSoup(response) + rows = soup.find_all("div",{"class":"entry odd vertical-center"}) + rows.extend(soup.find_all("div",{"class":"entry even vertical-center"})) + for row in rows: + columns = row.find_all('div') + columns = columns[3:] + variables = {} + for i,col in enumerate(columns): + value = unicodedata.normalize('NFKD', col.text).encode('ascii', 'ignore') + variables[headers[i]] = value + variables["College"] = college + scraperwiki.sqlite.save(unique_keys=['Name',"Department"], data = variables) + print(variables)