forked from opencivicdata/scrapers-us-municipal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmadison.py
63 lines (51 loc) · 2.34 KB
/
madison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from pupa.scrape import Jurisdiction, Organization
from legistar.people import LegistarPersonScraper
class MadisonPersonScraper(LegistarPersonScraper):
EXTRA_FIELDS = ('notes',)
DATE_FORMATS = ('%m/%d/%Y', '%m/%d/%Y*',)
def skip_item(self, item):
#return item['name'] in ('VACANCIES', 'Al Matano')
# TODO: this skips all non-city councilors, check to make sure it doesn't skip other
# interesting people?
return 'district' not in item['url']
class Madison(Jurisdiction):
division_id = 'ocd-division/country:us/state:wi/place:madison'
classification = 'government'
timezone = 'America/Chicago'
name = 'Madison'
url = 'http://www.cityofmadison.com/'
scrapers = {'people': MadisonPersonScraper}
# HTTPS is vital here, without it pagination doesn't work!
LEGISTAR_ROOT_URL = 'https://madison.legistar.com/'
def get_organizations(self):
council = Organization('City of Madison Common Council', classification='legislature')
for x in range(1,21):
council.add_post(str(x), role='Alder')
yield council
#ORG_CLASSIFICATIONS = {
# 'ALLIED AREA TASK FORCE': 'commission',
# 'TRANSPORT 2020 IMPLEMENTATION TASK FORCE': 'commission',
# 'COMMON COUNCIL': 'legislature',
# 'COMMON COUNCIL - DISCUSSION': 'commission',
# 'COMMUNITY ACTION COALITION FOR SOUTH CENTRAL WISCONSIN INC': 'commission',
# 'COMMUNITY DEVELOPMENT AUTHORITY': 'commission',
# 'MADISON COMMUNITY FOUNDATION': 'commission',
# 'MADISON FOOD POLICY COUNCIL': 'commission',
# 'MADISON HOUSING AUTHORITY': 'commission',
# 'PARKING COUNCIL FOR PEOPLE WITH DISABILITIES': 'commission',
#}
#def person_district(self, data):
# '''This corresponds to the label field on organizations posts.
# '''
# # First try to get it from bio.
# dist = re.findall(r'District\s+\d+', data['notes'])
# if dist:
# return dist.pop()
# # Then try website.
# dist = re.findall(r'/district(\d+)/', data['website'])
# if dist:
# return dist.pop()
# # Then email.
# dist = re.findall(r'district(\d+)', data['email'])
# if dist:
# return dist.pop()