-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintroScraper.py
70 lines (58 loc) · 1.93 KB
/
introScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
__author__ = 'Kevin'
from bs4 import BeautifulSoup
try:
import urllib.request as urllib2
except ImportError:
import urllib2
import tkinter as tk
BASE_URL = 'http://www.mrmoneymustache.com/blog/'
"""
Accepts: The blog page of MMM's blog (as defined by BASE_URL)
Returns: list of tuples where tuple[0] = title and tuple[1] = url
Possible errors: If the blog's HTML changes. This is the most likely error.
"""
def get_all_articles(section_url):
html = urllib2.urlopen(section_url).read()
soup = BeautifulSoup(html, 'lxml')
all_content = soup.find('section', 'content_area')
articles = []
for headline in soup.find_all('h2', 'headline'):
title = headline.string
url = headline.a['href']
title_url = title, url
articles.append(title_url)
return articles
"""
Checks to see if there is a new article.
Accepts: list of articles as generated by get_all_articles method (list of tuples where tuple[0] = title and tuple[1] = url)
Returns: Boolean
"""
def is_new_article(articles):
recentArticle = open('recentArticle', 'r+')
saved_title = recentArticle.readline()
first_article = articles[0]
new_title = first_article[0]
if saved_title == new_title:
is_new = False
else:
is_new = True
recentArticle.write(new_title)
recentArticle.close()
return is_new
class Application(tk.Frame):
def __init__(self, master=None):
tk.Frame.__init__(self, master)
self.grid()
self.createWidgets()
def createWidgets(self):
self.quitButton = tk.Button(self, text="Quit", command = self.quit)
self.quitButton.grid()
self.runButton = tk.Button(self, text="Run", command = self.run_program)
self.runButton.grid()
def run_program(self):
articles = get_all_articles(BASE_URL)
is_new = is_new_article(articles)
print(is_new)
app = Application()
app.master.title = "Sample App"
app.mainloop()