-
Notifications
You must be signed in to change notification settings - Fork 1
/
AutoCite_GUI.pyw
233 lines (166 loc) · 8.09 KB
/
AutoCite_GUI.pyw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# Batch Citation Machine GUI
import re, datetime, urllib.request
import tkinter.scrolledtext as scrolledtext
from tkinter import *
from bs4 import BeautifulSoup
from dateutil import parser
def citation_components(web_address):
req = urllib.request.Request(web_address, headers = {"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A'})
response = urllib.request.urlopen(req)
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
# Page Title from <title> tag | Website Title from <title> tag or from URL
try:
title_text = str(soup.title.contents[0]) # String Slice Removes <title> and </title>
except:
title_text = "" # In case no <title> tag
title_segments = title_text.split(" - ")
if len(title_segments) > 1: # If website title coems after "-" in page title
website_title = title_segments[-1]
page_title = " - "
page_title = page_title.join(title_segments[:-1])
else:
try:
website_title = re.search(r"([^.\/]+?)(?:\.(?:sg|net|com|org|gov|edu|int|eu|us))+",web_address).group(1) # Captures the last string between .DOMAIN and the . in front of that
except:
website_title = re.search(r"(?:http[s]*:\/\/)([^\/]+)",web_address).group(1)
website_title = website_title.capitalize() # Capitalises the first letter of the string
page_title = title_text
if page_title == "":
page_title = website_title # For when there is no <title> tag
page_title = page_title.strip()
# Searches for Authors via href with "author"
try:
for a in soup.find_all('a', href=True): # Find first author
if "author" in a['href']:
# print ("Author URL", a['href'])
author_path = a['href']
author_name = re.search(r"\/([^\/]+)$", author_path).group(1)
author_name = author_name.split("-")
first_name = author_name[0].capitalize()
last_name = author_name[-1].capitalize()
concat_name = first_name + last_name
if '=' in concat_name or '?' in concat_name or '+' in concat_name:
first_name = last_name = ""
except:
first_name = last_name = ""
# Accessed Date
today = datetime.date.today()
date_accessed = today.strftime("%B %d, %Y")
# Published Date
try:
date_published = ""
time_element = soup.find('time')
if time_element.has_attr('datetime') and time_element["datetime"]!= "": # Take date from attribute
web_datetime = time_element["datetime"]
date = parser.parse(web_datetime)
else: # take date from contents
date_published = time_element.contents[0]
date = parser.parse(date_published)
date_published = date.strftime("%B %d, %Y")
except Exception as e:
date_published = ""
pass
return (first_name,last_name,page_title,website_title,date_published, date_accessed)
def chicago_compile(web_address):
first_name,last_name,page_title,website_title,date_published, date_accessed = citation_components(web_address)
# Compiling the Citation
if first_name != "" and last_name != "":
citation = last_name + ", " + first_name + ". "
else:
citation = ""
citation += '"' +page_title + '." ' + website_title + ", "
if date_published != "":
citation += date_published + ", "
citation += web_address+ ". Accessed " + date_accessed + "."
return citation
def apa_compile(web_address):
first_name,last_name,page_title,website_title,date_published, date_accessed = citation_components(web_address)
no_author = False
if first_name != "" and last_name != "":
citation = last_name + ", " + first_name[0] + ". "
else:
citation = page_title + ' '
no_author = True
if date_published != "":
citation += "(" + date_published + "). "
else:
citation += "(n.d). "
if not no_author:
citation += page_title + ". "
citation += "Retrieved from " + web_address
return citation
def generate_citations():
citation_box.delete('1.0', END)
raw_urls = raw_box.get("1.0", END)
raw_urls = raw_urls.split("\n")
# print(raw_urls)
for url in raw_urls:
print(url)
if url == '': #Skip enpty links
continue
if url[:4] != "http": #If https:// or http:// are not included in the URL
url = "http://" + url
url = url.lower()
print("Format:", citation_format.get())
try:
if citation_format.get() == "APA":
citation_box.insert(END, apa_compile(url)+"\n")
else:
citation_box.insert(END, chicago_compile(url)+"\n")
except Exception as e:
citation_box.insert(END, "Failed to cite "+ url + " Error: " + str(e) + " \n")
citation_box.see(END)
citation_box.grid(row=0, column=0, sticky=E+W+N+S)
window.update()
citation_box.insert(END, "--- FINISHED ---")
citation_box.see(END)
citation_box.grid(row=0, column=0, sticky=E+W+N+S)
window = Tk()
window.geometry('1000x700')
window.title("AutoCite: Batch Citation Programme - BrandonTang89")
# Frame for radio buttons
button_frame = Frame(window)
button_frame.grid(row=0, column=0, padx=10, pady=10, sticky=W+N)
button_frame.rowconfigure(0, weight=1)
button_frame.columnconfigure(0, weight=1)
radio_label = Label(button_frame, text="Citation Format: ", font=("consolas",15))
radio_label.grid(row=0, column=0)
citation_format = StringVar()
citation_format.set("chicago") #Default is chicago
apa_button = Radiobutton(button_frame, text="APA", padx=20, variable=citation_format, value="APA", font=("consolas",15))
apa_button.grid(row=0, column=2)
chicago_button = Radiobutton(button_frame, text="Chicago", padx=20, variable=citation_format, value="chicago", font=("consolas",15))
chicago_button.grid(row=0, column=1)
# Frame for RAW URLS
raw_frame = LabelFrame(window, text="Raw URLs", padx=5, pady=5, font=("consolas",15))
raw_frame.grid(row=1, column=0, padx=10, pady=10, sticky=E+W+N+S)
raw_frame.rowconfigure(0, weight=1)
raw_frame.columnconfigure(0, weight=1)
raw_box = scrolledtext.ScrolledText(raw_frame, width=80, height=15, font="consolas")
#raw_box.insert(END, '''
#https://www.straitstimes.com/singapore/condo-conflicts
#https://www.channelnewsasia.com/news/asia/southeast-asian-leaders-meet-us-china-trade-war-asean-summit-12057538
#https://blog.seedly.sg/telegram-channels-that-every-millennials-singapore-must-have/
#https://stackoverflow.com/questions/5815747/beautifulsoup-getting-href
#https://www.channelnewsasia.com/news/asia/southeast-asian-leaders-meet-us-china-trade-war-asean-summit-12057538
#https://www.dissentmagazine.org/online_articles/can-democratic-socialism-rise-in-rural-america")
#''')
raw_box.grid(row=0, column=0,sticky=E+W+N+S)
# Frame for Button
cite_frame = Frame(window)
cite_frame.grid(row=1, column=2, padx=10, pady=20, sticky=E+W+N+S)
cite_frame.rowconfigure(0, weight=1)
cite_frame.columnconfigure(0, weight=1)
cite_button = Button(cite_frame, text="Generate \n Citations!", command=generate_citations, font=("consolas",15))
cite_button.grid(row=0, column=0, sticky=N+E+W+S)
# Frame for Citations
citation_frame = LabelFrame(window, text="Citations", padx=5, pady=5, font=("consolas",15))
citation_frame.grid(row=2, column=0, columnspan=3, padx=10, pady=10, sticky=E+W+N+S)
window.columnconfigure(0, weight=1)
window.rowconfigure(1, weight=1)
citation_frame.rowconfigure(0, weight=1)
citation_frame.columnconfigure(0, weight=1)
citation_box = scrolledtext.ScrolledText(citation_frame, width=80, height=17, font="consolas")
citation_box.grid(row=0, column=0, sticky=E+W+N+S)
window.mainloop()