-
Notifications
You must be signed in to change notification settings - Fork 4
/
test_application.py
executable file
·61 lines (47 loc) · 2.01 KB
/
test_application.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
import application
import os
def test_titles_are_wikipedia_articles():
"""Ensure that all topics listed in the people file are the titles
of Wikipedia articles, which is our controlled vocabulary.
"""
exception_file = os.path.abspath(os.path.join(os.path.dirname(__file__),
'title_exceptions.json'))
exception_list = application.get_file_contents(exception_file)
people = application.get_people()
all_topics = sorted(list(set([topic for persondata in people.values()
for topic in persondata["interests"]])))
invalid_titles = []
canonicalised_titles = []
print("Checking titles are Wikipedia articles:")
print()
people_with_this_topic = {}
for topic in all_topics:
print(f"Checking {topic}")
if topic in exception_list:
continue
try:
# TODO: we shoulds probably do this in batches to reduce
# the number of queries to wikipedia, and speed it up
canonical_title = application.canonical_title(topic)
if canonical_title != topic:
print(f" ➡️ Should be {canonical_title}")
except application.TitleNotFoundException:
people_with_this_topic[topic] = [
person for person in people.keys()
if topic in people[person]["interests"]]
print(f" ❌ Invalid ({', '.join(people_with_this_topic[topic])})")
invalid_titles += [topic]
continue
if canonical_title != topic:
canonicalised_titles += [(topic, canonical_title)]
print()
print("Invalid titles:")
for topic in invalid_titles:
print(f" {topic} ({', '.join(people_with_this_topic[topic])})")
print()
print("Canonicalised titles:")
for topic, canonicalised in canonicalised_titles:
print(f" {topic} -> {canonicalised}")
print()
assert(len(invalid_titles+canonicalised_titles) == 0)