From 57c5c28e548955b2294767621df9c7b65b7aeb42 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Mon, 29 Nov 2021 14:44:36 -0500 Subject: [PATCH 01/12] Adds new test data and comment explaining plan for the new scraper --- city_scrapers/spiders/cle_design_review.py | 28 + tests/files/cle_design_review.html | 1019 ++++++++------------ tests/files/cle_design_review.html.old | 608 ++++++++++++ 3 files changed, 1056 insertions(+), 599 deletions(-) create mode 100644 tests/files/cle_design_review.html.old diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index cd419ff..b9d1f03 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -22,6 +22,34 @@ def parse(self, response): Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping needs. """ + + """ + There's no element that wraps both the committee name/time and the dropdown containing the + agendas. As such we want to grab each committee name/times and then use the following dropdown + to get the agendas. Luckily all of the committee name/times are (and are the only thing in) divs with + the class '.mt-3' so we can grab all the divs with those classes and then look for the next sibling div with + the ".dropdown" class to get the links to all the agendas. + + Note that the city planning meeting is handled by a different scraper so we do look at it here. Luckily + the name/times for the city planning meeting are not currently wrapped in a div, so the list of nodes + described above won't include it. + + There are three other points to keep in mind for this scraper: + 1. The way the data is presented doesn't make it easy to know whether or not a meeting happened, but doesn't have an + agenda, or whether a meeting is going to happen on a normal meeting date. The strategy I'm using is to treat + the agenda links as authoritative for past (and if listed upcoming) meetings. So previous meetings are just read off of the + agenda links. For future meetings we take the greater of either: (a) the most recent agenda, or (b) the current day and then + calculate the remaining meetings this year from that info. As dates progress and agendas are added, those tentative meetings + will either continue to exist or disappear based on the ways the agendas are updated. + + 2. There is no mention of the year anywhere in the text of the site. We can extract it from the agenda link - at least + for now. But it will be important to keep an eye on how the site is changed in January. + + 3. Meetings are currently not being held in person but over webex. I've included this information in the time_notes section of the + meeting. Perhaps a more general notes section would make a bit more sense, but given the current fields on the + meeting object, time notes seemed like a reasonable place to put this. + """ + page_content = response.css("#content .field-items .field-item")[0] bold_text = " ".join(page_content.css("strong *::text").extract()) year_match = re.search(r"\d{4}(?= Agenda)", bold_text) diff --git a/tests/files/cle_design_review.html b/tests/files/cle_design_review.html index a48c7bd..d904fcf 100644 --- a/tests/files/cle_design_review.html +++ b/tests/files/cle_design_review.html @@ -1,608 +1,429 @@ - - - - - - - - - - Design Review Meeting Schedules | City of Cleveland - - - - - - - - - - - - - - - - - - -
- +
+
+ + +

CITY PLANNING COMMISSION

+

The City Planning Commission meets at 9am, every 1st & 3rd Friday of the month in Room 514, City Hall

+

Contact: Michael Bosak Phone: 216.664.3802 Email: mbosak@clevelandohio.gov +

+
+ +
+

DOWNTOWN/FLATS DESIGN REVIEW COMMITTEE

+ +

The Committee meets at 9:00 am, Thursday prior to the City Planning Commission meeting on Fridays in Room 514, City Hall

+ +

Contact: Anthony Santora Phone: 216.664.3815 Email: asantora@clevelandohio.gov

+ + +
+ +
+

EAST DESIGN REVIEW COMMITTEE +

+ +

The Committee meets on the 2nd & 4th Tuesdays @ 8:30 am in Cornucopia Place, 7201 Kinsman Road, Suite 103B

+ +

Contact: Nickol Calhoun Phone: 216.664.3817 Email: ncalhoun@clevelandohio.gov

+ + +
+ +
+

EUCLID CORRIDOR DESIGN REVIEW COMMITTEE

+

The Committee meets on the 1st & 3rd Thursdays @ 8:00 am in + The Agora Building- 5000 Euclid Ave

+

Contact: Kim Scott Phone: 216.664.3803 Email: kscott@clevelandohio.gov

+ + +
+ +
+

FAR WEST DESIGN REVIEW COMMITTEE

+

The Committee meets on the 1st & 3rd Wednesdays @ 8:00 am at + St. Mel's Catholic Church - 14436 Triskett

+

Contact: Adam Davenport Phone: 216.664.3800 Email: adavenport@clevelandohio.gov

+ + +
+ +
+

NEAR WEST DESIGN REVIEW COMMITTEE

+

The Committee meets on the 2nd & 4th Wednesdays @ 8:30 am at + South Branch Library, 3096 Scranton Rd.

+

Contact: Matt Moss Phone: 216.664.3807 Email: mmoss@clevelandohio.gov

+ + +
+ +
+

NORTHEAST DESIGN REVIEW COMMITTEE

+

The Committee meets on the 1st & 3rd Tuesdays @ 8:00 am at CPL Memorial-Nottingham Branch -17109 Lakeshore Blvd.

+

Sharonda Whatley Phone: 216.664.3806 Email: swhatley@clevelandohio.gov

+ + +
+ +
+

SOUTHEAST DESIGN REVIEW COMMITTEE

+

The Committee meets on the 2nd & 4th Wednesdays @ 5:00 pm at York-Rite Mason Temple -13512 Kinsman Road

+

Contact: Marka Fields Phone: 216.664.3465 Email: mfields@clevelandohio.gov

+ + +

 

+ +
+ + + + \ No newline at end of file diff --git a/tests/files/cle_design_review.html.old b/tests/files/cle_design_review.html.old new file mode 100644 index 0000000..a48c7bd --- /dev/null +++ b/tests/files/cle_design_review.html.old @@ -0,0 +1,608 @@ + + + + + + + + + + Design Review Meeting Schedules | City of Cleveland + + + + + + + + + + + + + + + + + + +
 

Design Review Meeting Schedules

+
+ + +
+
+ + + +
+

Design Review Committee Agendas
Downtown/Flats | East | Euclid Corridor | Far West | Near West | Northeast | Southeast | Mayor's Streetscape

+

+

CITY PLANNING

+

The City Planning Commission meets at 9am, every 1st & 3rd Friday of the month in Room 514, City Hall.

+ +
+

Under the conditions specified by law, the Cleveland Planning Commission will be conducting virtual meetings in a limited capacity using the WebEx Platform. This will include limited agenda items to initiate the process to ensure we can appropriately evaluate the process.

+

The Planning Commission will also be live streamed on YouTube and TV 20. The links for the live streams will be available before the meeting.

+

In order to keep the WebEx session to a manageable size we are asking individuals that wish to participate in the meeting to contact the City Planning office by phone or email. Those individuals not planning to comment on any agenda item during the WebEx session are encouraged to view one of the live streams.

+

To contact the City Planning office and request access to the WebEx City Planning Meeting please call 216.664.3826 or email us at cityplanning@clevelandohio.gov.

+

We ask that you include the name of the project you plan to comment on when requesting access to the WebEx meeting

+

WebEx Help | WebEx Website (App Download)

+

YouTube Live Stream | TV 20 Live Stream

+

+

2020 Agendas:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.03 - Agenda17 - Agenda - Photo Gallery
FEB.07 - Agenda - Photo Gallery21 - Agenda
MAR.06 - Agenda - Photo Gallery20
APR.0317
MAY0115 - Agenda
JUN.0519
JUL.0317
AUG.0721
SEP.0418
OCT.0216
NOV.0620
DEC.0418

CITYWIDE DESIGN REVIEW ADVISORY COMMITTEES

+

***Please Note***
All Design Review Committee meetings are being conducted virtually using the WebEx Platform. Please contact the staff planner for meeting information.DOWNTOWN/FLATS

+

+

Downtown/Flats Design Review Committee

+

The Committee meets at 9:00 am, Thursday prior to the City Planning Commission meeting on Fridays in Room 514, City Hall

+

Contact: Anthony Santora Phone: 216.664.3815 Email: asantora@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.0216 - Agenda
FEB.06 - Agenda20 - Agenda
MAR.05 - Agenda19
APR.0216
MAYApril 3014*
JUN.0418
JUL.02 - No meeting16
AUG.0620
SEP.0317
OCT.0115
NOV.0519
DEC.0317

EAST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Tuesdays @ 8:30 am in Cornucopia Place, 7201 Kinsman Road, Suite 103B

+

Contact: Nickol Calhoun Phone: 216.664.3817 Email: ncalhoun@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.14 - Agenda28 - Agenda
FEB.1125
MAR.1024
APR.1428
MAY1226
JUN.0923
JUL.1428
AUG.1125
SEP.0822
OCT.1327
NOV.1024
DEC.0822

EUCLID CORRIDOR DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Thursdays @ 8:00 am in The Agora Building- 5000 Euclid Ave

+

Contact: Kim Scott Phone: 216.664.3803 Email: kscott@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.0216 - Agenda
FEB.6 - Agenda20
MAR.5 - Agenda19
APR.0216
MAYApril 3014
JUN.0418
JUL.2 -No meeting16
AUG.0620
SEP.0317
OCT.0115
NOV.519
DEC.0317

FAR WEST DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Wednesdays @ 8:00 am at St. Mel's Catholic Church - 14436 Triskett

+

Contact: Adam Davenport Phone: 216.664.3800 Email: adavenport@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.1 -No meeting15
FEB.0519
MAR.0418
APR.0115
MAY0620
JUN.0317
JUL.0115
AUG.0519
SEP.0216
OCT.0721
NOV.0418
DEC.0216

NEAR WEST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Wednesdays @ 8:30 am at South Branch Library, 3096 Scranton Rd.

+

Contact: Matt Moss Phone: 216.664.3807 Email: mmoss@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.08 - Agenda22
FEB.12 - Agenda26 - Agenda
MAR.11 - Agenda25
APR.0822
MAY13 - Agenda27
JUN.1024
JUL.0822
AUG.1226
SEP.0923
OCT.1428
NOV.1125
DEC.0923

NORTHEAST DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Tuesdays @ 8:00 am at CPL Memorial-Nottingham Branch - 17109 Lakeshore Blvd.

+

Contact: Sharonda Whatley Phone: 216.664.3806 Email: swhatley@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.0721
FEB.0418 - Agenda
MAR.0317 - Agenda
APR.0721
MAY05 - Agenda19 - Agenda
JUN.0216
JUL.0721
AUG.0418
SEP.0115
OCT.0620
NOV.0317
DEC.0115

SOUTHEAST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Wednesdays @ 5:00 pm at York-Rite Mason Temple - 13512 Kinsman Road

+

Contact: Marka Fields Phone: 216.664.3465 Email: mfields@clevelandohio.gov

+

Click on the meeting date for agenda (PDF)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.08 - Agenda22 - Agenda
FEB.1226
MAR.11 - Agenda25
APR.0822
MAY1327
JUN.1024
JUL.0822
AUG.1226
SEP.0923
OCT.1428
NOV.1125
DEC.0923

OTHER REVIEW COMMITTEES

+

MAYOR'S INFRASTRUCTURE & STREETSCAPE ADVISORY COMMITTEE
The Committee meets on the 1st & 3rd Tuesdays @ 2:00 pm in Room 514, City Hall

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JAN.0721
FEB.0418
MAR.0317
APR.0721
MAY0519
JUN.0216
JUL.0721
AUG.0418
SEP.0115
OCT.0620
NOV.0317
DEC.0115

5170
164765
1613
5166
+ + + +
+
+
+
+
 
+ From 6a292c647ff3f2dee268d3f4d722e3415b80c321 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Wed, 1 Dec 2021 11:11:11 -0500 Subject: [PATCH 02/12] Gets tests running with new scraper --- city_scrapers/spiders/cle_design_review.py | 138 +++++++++------------ 1 file changed, 61 insertions(+), 77 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index b9d1f03..6caff45 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -14,6 +14,7 @@ class CleDesignReviewSpider(CityScrapersSpider): start_urls = [ "http://clevelandohio.gov/CityofCleveland/Home/Government/CityAgencies/CityPlanningCommission/MeetingSchedules" # noqa ] + time_notes = "fill in more about webex" def parse(self, response): """ @@ -35,12 +36,12 @@ def parse(self, response): described above won't include it. There are three other points to keep in mind for this scraper: - 1. The way the data is presented doesn't make it easy to know whether or not a meeting happened, but doesn't have an + 1. The way the data is presented doesn't make it easy to know whether or not a meeting occurred but doesn't have an agenda, or whether a meeting is going to happen on a normal meeting date. The strategy I'm using is to treat the agenda links as authoritative for past (and if listed upcoming) meetings. So previous meetings are just read off of the - agenda links. For future meetings we take the greater of either: (a) the most recent agenda, or (b) the current day and then - calculate the remaining meetings this year from that info. As dates progress and agendas are added, those tentative meetings - will either continue to exist or disappear based on the ways the agendas are updated. + agenda links. For future meetings we take the date of the most recent agenda and then + calculate the remaining meetings this year from that date. As dates progress and agendas are added, those tentative meetings + will either be confirmed to exist or disappear based on the ways the agendas are updated. 2. There is no mention of the year anywhere in the text of the site. We can extract it from the agenda link - at least for now. But it will be important to keep an eye on how the site is changed in January. @@ -49,96 +50,73 @@ def parse(self, response): meeting. Perhaps a more general notes section would make a bit more sense, but given the current fields on the meeting object, time notes seemed like a reasonable place to put this. """ - - page_content = response.css("#content .field-items .field-item")[0] - bold_text = " ".join(page_content.css("strong *::text").extract()) - year_match = re.search(r"\d{4}(?= Agenda)", bold_text) - if year_match: - year_str = year_match.group() - else: - year_str = str(datetime.now().year) - design_review_committees = re.split(r"\", page_content.extract())[1:] - for committee in design_review_committees: - committee_item = Selector(text=committee) - title = self._parse_title(committee_item) + committee_metas = response.css("div.mt-3") # this skips city planning + committee_agendas = response.css("div.mt-3 + div.dropdown") + if len(committee_metas) != len(committee_agendas): + # we haven't sucessfully extracted matched metas and agendas so we can't safely iterate over them together. + raise ValueError("Cannot match committee agandas to committee metadata") + committee_items = zip(committee_metas, committee_agendas) + + for committee_meta, commitee_agenda_list in committee_items: + title = self._parse_title(committee_meta) if not title: continue - location = self._parse_location(committee_item) - time_str = self._parse_time_str(committee_item) - for row in committee_item.css(".report tr"): - month_str = ( - row.css("td:first-child::text").extract_first().replace(".", "") + location = self._parse_location(committee_meta) + time_str = self._parse_time_str(committee_meta) + for agenda in commitee_agenda_list.css("div.dropdown-menu a.dropdown-item"): + month_str, day_str = agenda.css("*::text").extract_first().strip().split(" ") + year_str = self._parse_year_from_agenda_link(agenda) + + start = self._parse_start(year_str, month_str, day_str, time_str) + if not start: + continue + meeting = Meeting( + title=title, + description="", + classification=ADVISORY_COMMITTEE, + start=start, + end=None, + all_day=False, + time_notes=self.time_notes, + location=location, + links=self._parse_links(agenda, response), + source=response.url, ) - for date_cell in row.css("td:not(:first-child)"): - start = self._parse_start(date_cell, year_str, month_str, time_str) - if not start: - continue - meeting = Meeting( - title=title, - description="", - classification=ADVISORY_COMMITTEE, - start=start, - end=None, - all_day=False, - time_notes="", - location=location, - links=self._parse_links(date_cell, response), - source=response.url, - ) - - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - - yield meeting + + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + + yield meeting def _parse_title(self, item): """Parse or generate meeting title.""" committee_strs = [ c.strip() - for c in item.css("p > strong::text").extract() + for c in item.css("h4::text").extract() if c.strip().upper().endswith("DESIGN REVIEW COMMITTEE") ] if len(committee_strs): return committee_strs[0].title() def _parse_time_str(self, item): - desc_text = " ".join(item.css("p *::text").extract()) + desc_text = " ".join(item.css("p.mb-1::text").extract()) time_match = re.search(r"\d{1,2}:\d{2}\s*[apm]{2}", desc_text) if time_match: return time_match.group().replace(" ", "") return "12:00am" - def _parse_start(self, item, year_str, month_str, time_str): + def _parse_start(self, year_str, month_str, day_str, time_str): """Parse start datetime as a naive datetime object.""" - cell_text = " ".join(item.css("* ::text").extract()) - date_text = re.sub(r"\D", "", cell_text) - if not date_text or "No meeting" in cell_text: - return - date_str = " ".join([year_str, month_str, date_text, time_str]) - return datetime.strptime(date_str, "%Y %b %d %I:%M%p") + date_str = " ".join([year_str, month_str, day_str, time_str]) + return datetime.strptime(date_str, "%Y %B %d %I:%M%p") def _parse_location(self, item): """Parse or generate location.""" - desc_str = " ".join(item.css("p[id] *::text").extract()) - # Override for first committee - if "CITYWIDE" in desc_str: - desc_str = " ".join( - [l for l in item.css("p *::text").extract() if "days" in l] - ) + desc_str = " ".join(item.css("p.mb-1::text").extract()) loc_str = re.sub(r"\s+", " ", re.split(r"(\sin\s|\sat\s)", desc_str)[-1]) - if "City Hall" in loc_str: - loc_name = "City Hall" - room_match = re.search(r"(?<=Room )\d+", loc_str) - if room_match: - loc_addr = "601 Lakeside Ave, Room {}, Cleveland OH 44114".format( - room_match.group() - ) - else: - loc_addr = "601 Lakeside Ave, Cleveland OH 44114" - else: - split_loc = loc_str.split("-") - loc_name = "-".join(split_loc[:-1]) - loc_addr = split_loc[-1] + split_loc = loc_str.split("-") + loc_name = "-".join(split_loc[:-1]) + loc_addr = split_loc[-1] if "Cleveland" not in loc_addr: loc_addr = loc_addr.strip() + " Cleveland, OH" return { @@ -148,11 +126,17 @@ def _parse_location(self, item): def _parse_links(self, item, response): links = [] - for link in item.css("a"): - links.append( - { - "title": " ".join(link.css("*::text").extract()).strip(), - "href": response.urljoin(link.attrib["href"]), - } - ) + links.append( + { + "title": " ".join(item.css("*::text").extract()).strip(), + "href": response.urljoin(item.attrib["href"]), + } + ) return links + + def _parse_year_from_agenda_link(self, item): + link = item.attrib["href"] + year_match = re.search(r"\/(20\d{2})\/", link) + if year_match: + return year_match.group(1) + return "2021" \ No newline at end of file From 28898df3885313f2293db4f024bf7e1d8da1af99 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Wed, 1 Dec 2021 11:59:17 -0500 Subject: [PATCH 03/12] Gets tests passing --- city_scrapers/spiders/cle_design_review.py | 39 ++++++++++++++++------ tests/files/cle_design_review.html | 2 +- tests/test_cle_design_review.py | 19 +++++------ 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 6caff45..bc403a7 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -12,9 +12,9 @@ class CleDesignReviewSpider(CityScrapersSpider): agency = "Cleveland Design Review Advisory Committees" timezone = "America/Detroit" start_urls = [ - "http://clevelandohio.gov/CityofCleveland/Home/Government/CityAgencies/CityPlanningCommission/MeetingSchedules" # noqa + "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa ] - time_notes = "fill in more about webex" + time_notes = "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact " def parse(self, response): """ @@ -46,11 +46,11 @@ def parse(self, response): 2. There is no mention of the year anywhere in the text of the site. We can extract it from the agenda link - at least for now. But it will be important to keep an eye on how the site is changed in January. - 3. Meetings are currently not being held in person but over webex. I've included this information in the time_notes section of the + 3. Meetings are currently not being held in person but over webex. We've included this information in the time_notes section of the meeting. Perhaps a more general notes section would make a bit more sense, but given the current fields on the meeting object, time notes seemed like a reasonable place to put this. """ - committee_metas = response.css("div.mt-3") # this skips city planning + committee_metas = response.css("div.mt-3") # this skips city planning since it is handled by a separate scraper committee_agendas = response.css("div.mt-3 + div.dropdown") if len(committee_metas) != len(committee_agendas): # we haven't sucessfully extracted matched metas and agendas so we can't safely iterate over them together. @@ -63,6 +63,7 @@ def parse(self, response): continue location = self._parse_location(committee_meta) time_str = self._parse_time_str(committee_meta) + email_contact = self._parse_email_contact(committee_meta) for agenda in commitee_agenda_list.css("div.dropdown-menu a.dropdown-item"): month_str, day_str = agenda.css("*::text").extract_first().strip().split(" ") year_str = self._parse_year_from_agenda_link(agenda) @@ -77,7 +78,7 @@ def parse(self, response): start=start, end=None, all_day=False, - time_notes=self.time_notes, + time_notes=self.time_notes + email_contact, location=location, links=self._parse_links(agenda, response), source=response.url, @@ -114,9 +115,22 @@ def _parse_location(self, item): """Parse or generate location.""" desc_str = " ".join(item.css("p.mb-1::text").extract()) loc_str = re.sub(r"\s+", " ", re.split(r"(\sin\s|\sat\s)", desc_str)[-1]) - split_loc = loc_str.split("-") - loc_name = "-".join(split_loc[:-1]) - loc_addr = split_loc[-1] + # The downtown/flats commission doesn't give the full address - it just says + # city hall so we need a special case to add the street address + if "City Hall" in loc_str: + loc_name = "City Hall" + room_match = re.search(r"(?<=Room )\d+", loc_str) + if room_match: + loc_addr = "601 Lakeside Ave, Room {}, Cleveland OH 44114".format( + room_match.group() + ) + else: + loc_addr = "601 Lakeside Ave, Cleveland OH 44114" + else: + split_loc = loc_str.split("-") + loc_name = "-".join(split_loc[:-1]) + loc_addr = split_loc[-1] + # We need to make sure that the address ends with the city and state if "Cleveland" not in loc_addr: loc_addr = loc_addr.strip() + " Cleveland, OH" return { @@ -128,7 +142,7 @@ def _parse_links(self, item, response): links = [] links.append( { - "title": " ".join(item.css("*::text").extract()).strip(), + "title": "Agenda", "href": response.urljoin(item.attrib["href"]), } ) @@ -139,4 +153,9 @@ def _parse_year_from_agenda_link(self, item): year_match = re.search(r"\/(20\d{2})\/", link) if year_match: return year_match.group(1) - return "2021" \ No newline at end of file + return "2021" + + def _parse_email_contact(self, item): + email_str = item.css("p.mt-1::text").extract()[2] + return email_str.replace(": ", "") + diff --git a/tests/files/cle_design_review.html b/tests/files/cle_design_review.html index d904fcf..be133f3 100644 --- a/tests/files/cle_design_review.html +++ b/tests/files/cle_design_review.html @@ -33,7 +33,7 @@ --> - + diff --git a/tests/test_cle_design_review.py b/tests/test_cle_design_review.py index b87197b..38dc8f9 100644 --- a/tests/test_cle_design_review.py +++ b/tests/test_cle_design_review.py @@ -11,12 +11,12 @@ test_response = file_response( join(dirname(__file__), "files", "cle_design_review.html"), url=( - "http://clevelandohio.gov/CityofCleveland/Home/Government/CityAgencies/CityPlanningCommission/MeetingSchedules" # noqa + "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa ), ) spider = CleDesignReviewSpider() -freezer = freeze_time("2020-05-19") +freezer = freeze_time("2021-12-01") freezer.start() parsed_items = [item for item in spider.parse(test_response)] @@ -25,7 +25,7 @@ def test_count(): - assert len(parsed_items) == 165 + assert len(parsed_items) == 96 def test_title(): @@ -37,7 +37,7 @@ def test_description(): def test_start(): - assert parsed_items[0]["start"] == datetime(2020, 1, 2, 9, 0) + assert parsed_items[0]["start"] == datetime(2021, 1, 14, 9, 0) def test_end(): @@ -45,13 +45,13 @@ def test_end(): def test_time_notes(): - assert parsed_items[0]["time_notes"] == "" + assert parsed_items[0]["time_notes"] == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact asantora@clevelandohio.gov" def test_id(): assert ( parsed_items[0]["id"] - == "cle_design_review/202001020900/x/downtown_flats_design_review_committee" + == "cle_design_review/202101140900/x/downtown_flats_design_review_committee" ) @@ -73,15 +73,14 @@ def test_location(): def test_source(): assert ( parsed_items[0]["source"] - == "http://clevelandohio.gov/CityofCleveland/Home/Government/CityAgencies/CityPlanningCommission/MeetingSchedules" # noqa + == "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa ) def test_links(): - assert parsed_items[0]["links"] == [] - assert parsed_items[1]["links"] == [ + assert parsed_items[0]["links"] == [ { - "href": "http://clevelandohio.gov/sites/default/files/planning/drc/agenda/2020/DF-DRAC-agenda-1-16-20.pdf", # noqa + "href": "https://planning.clevelandohio.gov/designreview/drcagenda/2021/PDF/CPC-Agenda-WebEx-meeting-011421.pdf", # noqa "title": "Agenda", } ] From f10eb5930669fbc18c6cbc24c4ad8b5906e29ee5 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Thu, 2 Dec 2021 13:29:49 -0500 Subject: [PATCH 04/12] Gets the calculator working --- city_scrapers/spiders/cle_design_review.py | 93 ++++++++++++++++++++-- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index bc403a7..7c3cf0d 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -1,5 +1,7 @@ import re -from datetime import datetime +import calendar +import time +from datetime import datetime, date from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting @@ -17,13 +19,6 @@ class CleDesignReviewSpider(CityScrapersSpider): time_notes = "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact " def parse(self, response): - """ - `parse` should always `yield` Meeting items. - - Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping - needs. - """ - """ There's no element that wraps both the committee name/time and the dropdown containing the agendas. As such we want to grab each committee name/times and then use the following dropdown @@ -159,3 +154,85 @@ def _parse_email_contact(self, item): email_str = item.css("p.mt-1::text").extract()[2] return email_str.replace(": ", "") + def _parse_weekday(self, weekday): + return time.strptime(weekday, "%A").tm_wday + + + def _calculate_upcoming_meeting_days(self, chosen_weekday, chosen_weeks, start, end): + current_month = start.month + current_year = start.year + + current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + raw_days = [(day, current_month, current_year) for day in current_month_days] + + while not (current_month == end.month and current_year == end.year): + current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + raw_days.append([(day, current_month, current_year) for day in current_month_days]) + + current_month = current_month+1 if current_month != 12 else 1 + if current_month == 1: + current_year = current_year + 1 + + # we now have all the relevant dates for the given months but we need to filter out days before and after start and end + return [day, month, year in raw_days if + (not too_early(day, month, year, start)) and (not too_late(day, month, year, end))] + + +def calculate_upcoming_meeting_days(chosen_weekday, chosen_weeks, start, end): + """ + This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month for + any given time frame between start and end dates. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third + start (date): the first day to begin calculating meetings from + end (date): the final day to be considered as a potential meeting date + + Returns: + []date: an array of dates that match the given conditions + """ + current_month = start.month + current_year = start.year + + # current_month_days = calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + # raw_days = [(day, current_month, current_year) for day in current_month_days] + + raw_dates = [] + while not (current_month == end.month and current_year == end.year): + current_month_days = _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] + + # we can't easily use % arithmetic here since we're starting at 1, so it's a bit easier to read this way + current_month = current_month+1 if current_month != 12 else 1 + if current_month == 1: + current_year = current_year + 1 + + # add the days for the final month since they're missed by the loop + current_month_days = _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] + # we now have all the relevant dates for the given months but we need to filter out days before and after start and end + return [current_date for current_date in raw_dates if (start <= current_date <= end)] + +def _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, year, month): + """ + This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third + year (int): the year as an integer + month (int): the month as an integer + + Returns: + []int: an array of the days of the month that matched the given conditions. + """ + + days_of_the_month = calendar.Calendar().itermonthdays2(year, month) + # we create a list of all days in the month that are the proper weekday - day is 0 if it is outside the month + # but present to make complete first or last weeks + potential_days = [day for day, weekday in days_of_the_month if day != 0 and weekday == chosen_weekday] + # we then add one to the index and see if the resulting number is in the chosen_weeks array + chosen_days = [day for i, day in enumerate(potential_days) if (i) in chosen_weeks ] + + return chosen_days From 24cc6c55e27e8114aa8fb3f6a4473e17d9733fe3 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 3 Dec 2021 10:15:37 -0500 Subject: [PATCH 05/12] Gets everything working and tested --- city_scrapers/spiders/cle_design_review.py | 186 ++++--- tests/files/cle_design_review.html.old | 608 --------------------- tests/test_cle_design_review.py | 67 ++- 3 files changed, 178 insertions(+), 683 deletions(-) delete mode 100644 tests/files/cle_design_review.html.old diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 7c3cf0d..1789ecc 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -1,7 +1,7 @@ import re import calendar import time -from datetime import datetime, date +from datetime import datetime, date, timedelta from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting @@ -59,11 +59,16 @@ def parse(self, response): location = self._parse_location(committee_meta) time_str = self._parse_time_str(committee_meta) email_contact = self._parse_email_contact(committee_meta) + weekday, chosen_weeks, is_downtown = self._parse_meeting_schedule_info(committee_meta) + most_recent_start = datetime.today() + + # Start by looking through the agendas for existing meetings for agenda in commitee_agenda_list.css("div.dropdown-menu a.dropdown-item"): month_str, day_str = agenda.css("*::text").extract_first().strip().split(" ") year_str = self._parse_year_from_agenda_link(agenda) start = self._parse_start(year_str, month_str, day_str, time_str) + most_recent_start = start # most_recent_start will be used to calculate upcoming meetings with no agenda if not start: continue meeting = Meeting( @@ -82,6 +87,33 @@ def parse(self, response): meeting["status"] = self._get_status(meeting) meeting["id"] = self._get_id(meeting) + yield meeting + + # next we calculate upcoming meeting dates for 60 days after the last agenda date + calc_start = most_recent_start + timedelta(days=1) + calc_end = calc_start + timedelta(days=60) + upcoming_meetings = self._calculate_upcoming_meeting_days(weekday, chosen_weeks, calc_start, calc_end) + if is_downtown: # downtown meetings are a day before the one calculated + tmp = [day+timedelta(days=-1) for day in upcoming_meetings] + upcoming_meetings = tmp # this was just to make sure we weren't mutating the list as we used in a comprehension + for day in upcoming_meetings: + start = self._parse_calculated_start(day, time_str) + meeting = Meeting( + title=title, + description="", + classification=ADVISORY_COMMITTEE, + start=start, + end=None, + all_day=False, + time_notes=self.time_notes + email_contact, + location=location, + links=[], + source=response.url, + ) + + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + yield meeting def _parse_title(self, item): @@ -95,6 +127,7 @@ def _parse_title(self, item): return committee_strs[0].title() def _parse_time_str(self, item): + """Parse out the time as a string in the format hh:mm:am/pm""" desc_text = " ".join(item.css("p.mb-1::text").extract()) time_match = re.search(r"\d{1,2}:\d{2}\s*[apm]{2}", desc_text) if time_match: @@ -106,6 +139,11 @@ def _parse_start(self, year_str, month_str, day_str, time_str): date_str = " ".join([year_str, month_str, day_str, time_str]) return datetime.strptime(date_str, "%Y %B %d %I:%M%p") + def _parse_calculated_start(self, day, time_str): + """Parse start datetime from python date and a string with the time.""" + date_str = " ".join([day.strftime("%Y %B %d"), time_str]) + return datetime.strptime(date_str, "%Y %B %d %I:%M%p") + def _parse_location(self, item): """Parse or generate location.""" desc_str = " ".join(item.css("p.mb-1::text").extract()) @@ -134,6 +172,7 @@ def _parse_location(self, item): } def _parse_links(self, item, response): + """Parse out the links for the meeting""" links = [] links.append( { @@ -144,6 +183,7 @@ def _parse_links(self, item, response): return links def _parse_year_from_agenda_link(self, item): + """Parse the year as a string from a link containing the agenda""" link = item.attrib["href"] year_match = re.search(r"\/(20\d{2})\/", link) if year_match: @@ -151,88 +191,98 @@ def _parse_year_from_agenda_link(self, item): return "2021" def _parse_email_contact(self, item): + """Parses the email for a committee's contact""" email_str = item.css("p.mt-1::text").extract()[2] return email_str.replace(": ", "") - - def _parse_weekday(self, weekday): - return time.strptime(weekday, "%A").tm_wday + + def _parse_meeting_schedule_info(self, committee_meta): + """Parses out the weekday, and frequency of the meeting for calculating future dates""" + # Add special case for downtown downtown meetings are the day before city planning, + # so we calculate using the city planning schedule (1, and 3rd Friday) and set a flag + # so we can subtract a day from the results + committee_str = " ".join(committee_meta.css("p.mb-1::text").extract()) + is_downtown = "prior to the City Planning Commission" in committee_str + + if is_downtown: + weekday = 4 + chosen_weeks = [0, 2] + else: + weekday_str = committee_meta.css("p.mb-1 strong::text").extract_first() + weekday = self._parse_weekday(weekday_str) + raw_weeks = re.findall(r"1st|2nd|3rd|4th", committee_str) + chosen_weeks = [self._parse_ordinal(ordinal) for ordinal in raw_weeks] + return weekday, chosen_weeks, is_downtown + def _parse_weekday(self, weekday): + """Parses weekday strings as their integer equivalent""" + # we cut off the last char of weekday, because it comes through with an 's' i.e. 'Tuesdays' + return time.strptime(weekday[:-1], "%A").tm_wday + + def _parse_ordinal(self, ordinal_str): + """Parses ordinals as their integer equivalent beginning from 0""" + ordinal_lookup = { + "1st": 0, + "2nd": 1, + "3rd": 2, + "4th": 3 + } + return ordinal_lookup[ordinal_str.lower()] def _calculate_upcoming_meeting_days(self, chosen_weekday, chosen_weeks, start, end): + """ + This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month for + any given time frame between start and end dates. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third + start (date): the first day to begin calculating meetings from + end (date): the final day to be considered as a potential meeting date + + Returns: + []date: an array of dates that match the given conditions + """ current_month = start.month current_year = start.year - current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - raw_days = [(day, current_month, current_year) for day in current_month_days] + # current_month_days = calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + # raw_days = [(day, current_month, current_year) for day in current_month_days] + raw_dates = [] while not (current_month == end.month and current_year == end.year): current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - raw_days.append([(day, current_month, current_year) for day in current_month_days]) + raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] + # we can't easily use % arithmetic here since we're starting at 1, so it's a bit easier to read this way current_month = current_month+1 if current_month != 12 else 1 if current_month == 1: current_year = current_year + 1 - # we now have all the relevant dates for the given months but we need to filter out days before and after start and end - return [day, month, year in raw_days if - (not too_early(day, month, year, start)) and (not too_late(day, month, year, end))] - - -def calculate_upcoming_meeting_days(chosen_weekday, chosen_weeks, start, end): - """ - This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month for - any given time frame between start and end dates. - - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third - start (date): the first day to begin calculating meetings from - end (date): the final day to be considered as a potential meeting date - - Returns: - []date: an array of dates that match the given conditions - """ - current_month = start.month - current_year = start.year - - # current_month_days = calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - # raw_days = [(day, current_month, current_year) for day in current_month_days] - - raw_dates = [] - while not (current_month == end.month and current_year == end.year): - current_month_days = _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) + # add the days for the final month since they're missed by the loop + current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] + # we now have all the relevant dates for the given months but we need to filter out days before and after start and end + return [current_date for current_date in raw_dates if (start.date() <= current_date <= end.date())] - # we can't easily use % arithmetic here since we're starting at 1, so it's a bit easier to read this way - current_month = current_month+1 if current_month != 12 else 1 - if current_month == 1: - current_year = current_year + 1 - - # add the days for the final month since they're missed by the loop - current_month_days = _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] - # we now have all the relevant dates for the given months but we need to filter out days before and after start and end - return [current_date for current_date in raw_dates if (start <= current_date <= end)] - -def _calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, year, month): - """ - This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month. - - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third - year (int): the year as an integer - month (int): the month as an integer - - Returns: - []int: an array of the days of the month that matched the given conditions. - """ - - days_of_the_month = calendar.Calendar().itermonthdays2(year, month) - # we create a list of all days in the month that are the proper weekday - day is 0 if it is outside the month - # but present to make complete first or last weeks - potential_days = [day for day, weekday in days_of_the_month if day != 0 and weekday == chosen_weekday] - # we then add one to the index and see if the resulting number is in the chosen_weeks array - chosen_days = [day for i, day in enumerate(potential_days) if (i) in chosen_weeks ] - - return chosen_days + def _calculate_meeting_days_per_month(self, chosen_weekday, chosen_weeks, year, month): + """ + This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third + year (int): the year as an integer + month (int): the month as an integer + + Returns: + []int: an array of the days of the month that matched the given conditions. + """ + + days_of_the_month = calendar.Calendar().itermonthdays2(year, month) + # we create a list of all days in the month that are the proper weekday - day is 0 if it is outside the month + # but present to make complete first or last weeks + potential_days = [day for day, weekday in days_of_the_month if day != 0 and weekday == chosen_weekday] + # we then add one to the index and see if the resulting number is in the chosen_weeks array + chosen_days = [day for i, day in enumerate(potential_days) if (i) in chosen_weeks ] + + return chosen_days diff --git a/tests/files/cle_design_review.html.old b/tests/files/cle_design_review.html.old deleted file mode 100644 index a48c7bd..0000000 --- a/tests/files/cle_design_review.html.old +++ /dev/null @@ -1,608 +0,0 @@ - - - - - - - - - - Design Review Meeting Schedules | City of Cleveland - - - - - - - - - - - - - - - - - - -
 

Design Review Meeting Schedules

-
- - -
-
- - - -
-

Design Review Committee Agendas
Downtown/Flats | East | Euclid Corridor | Far West | Near West | Northeast | Southeast | Mayor's Streetscape

-

-

CITY PLANNING

-

The City Planning Commission meets at 9am, every 1st & 3rd Friday of the month in Room 514, City Hall.

- -
-

Under the conditions specified by law, the Cleveland Planning Commission will be conducting virtual meetings in a limited capacity using the WebEx Platform. This will include limited agenda items to initiate the process to ensure we can appropriately evaluate the process.

-

The Planning Commission will also be live streamed on YouTube and TV 20. The links for the live streams will be available before the meeting.

-

In order to keep the WebEx session to a manageable size we are asking individuals that wish to participate in the meeting to contact the City Planning office by phone or email. Those individuals not planning to comment on any agenda item during the WebEx session are encouraged to view one of the live streams.

-

To contact the City Planning office and request access to the WebEx City Planning Meeting please call 216.664.3826 or email us at cityplanning@clevelandohio.gov.

-

We ask that you include the name of the project you plan to comment on when requesting access to the WebEx meeting

-

WebEx Help | WebEx Website (App Download)

-

YouTube Live Stream | TV 20 Live Stream

-

-

2020 Agendas:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.03 - Agenda17 - Agenda - Photo Gallery
FEB.07 - Agenda - Photo Gallery21 - Agenda
MAR.06 - Agenda - Photo Gallery20
APR.0317
MAY0115 - Agenda
JUN.0519
JUL.0317
AUG.0721
SEP.0418
OCT.0216
NOV.0620
DEC.0418

CITYWIDE DESIGN REVIEW ADVISORY COMMITTEES

-

***Please Note***
All Design Review Committee meetings are being conducted virtually using the WebEx Platform. Please contact the staff planner for meeting information.DOWNTOWN/FLATS

-

-

Downtown/Flats Design Review Committee

-

The Committee meets at 9:00 am, Thursday prior to the City Planning Commission meeting on Fridays in Room 514, City Hall

-

Contact: Anthony Santora Phone: 216.664.3815 Email: asantora@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.0216 - Agenda
FEB.06 - Agenda20 - Agenda
MAR.05 - Agenda19
APR.0216
MAYApril 3014*
JUN.0418
JUL.02 - No meeting16
AUG.0620
SEP.0317
OCT.0115
NOV.0519
DEC.0317

EAST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Tuesdays @ 8:30 am in Cornucopia Place, 7201 Kinsman Road, Suite 103B

-

Contact: Nickol Calhoun Phone: 216.664.3817 Email: ncalhoun@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.14 - Agenda28 - Agenda
FEB.1125
MAR.1024
APR.1428
MAY1226
JUN.0923
JUL.1428
AUG.1125
SEP.0822
OCT.1327
NOV.1024
DEC.0822

EUCLID CORRIDOR DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Thursdays @ 8:00 am in The Agora Building- 5000 Euclid Ave

-

Contact: Kim Scott Phone: 216.664.3803 Email: kscott@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.0216 - Agenda
FEB.6 - Agenda20
MAR.5 - Agenda19
APR.0216
MAYApril 3014
JUN.0418
JUL.2 -No meeting16
AUG.0620
SEP.0317
OCT.0115
NOV.519
DEC.0317

FAR WEST DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Wednesdays @ 8:00 am at St. Mel's Catholic Church - 14436 Triskett

-

Contact: Adam Davenport Phone: 216.664.3800 Email: adavenport@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.1 -No meeting15
FEB.0519
MAR.0418
APR.0115
MAY0620
JUN.0317
JUL.0115
AUG.0519
SEP.0216
OCT.0721
NOV.0418
DEC.0216

NEAR WEST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Wednesdays @ 8:30 am at South Branch Library, 3096 Scranton Rd.

-

Contact: Matt Moss Phone: 216.664.3807 Email: mmoss@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.08 - Agenda22
FEB.12 - Agenda26 - Agenda
MAR.11 - Agenda25
APR.0822
MAY13 - Agenda27
JUN.1024
JUL.0822
AUG.1226
SEP.0923
OCT.1428
NOV.1125
DEC.0923

NORTHEAST DESIGN REVIEW COMMITTEE
The Committee meets on the 1st & 3rd Tuesdays @ 8:00 am at CPL Memorial-Nottingham Branch - 17109 Lakeshore Blvd.

-

Contact: Sharonda Whatley Phone: 216.664.3806 Email: swhatley@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.0721
FEB.0418 - Agenda
MAR.0317 - Agenda
APR.0721
MAY05 - Agenda19 - Agenda
JUN.0216
JUL.0721
AUG.0418
SEP.0115
OCT.0620
NOV.0317
DEC.0115

SOUTHEAST DESIGN REVIEW COMMITTEE
The Committee meets on the 2nd & 4th Wednesdays @ 5:00 pm at York-Rite Mason Temple - 13512 Kinsman Road

-

Contact: Marka Fields Phone: 216.664.3465 Email: mfields@clevelandohio.gov

-

Click on the meeting date for agenda (PDF)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.08 - Agenda22 - Agenda
FEB.1226
MAR.11 - Agenda25
APR.0822
MAY1327
JUN.1024
JUL.0822
AUG.1226
SEP.0923
OCT.1428
NOV.1125
DEC.0923

OTHER REVIEW COMMITTEES

-

MAYOR'S INFRASTRUCTURE & STREETSCAPE ADVISORY COMMITTEE
The Committee meets on the 1st & 3rd Tuesdays @ 2:00 pm in Room 514, City Hall

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JAN.0721
FEB.0418
MAR.0317
APR.0721
MAY0519
JUN.0216
JUL.0721
AUG.0418
SEP.0115
OCT.0620
NOV.0317
DEC.0115

5170
164765
1613
5166
- - - -
-
-
-
-
 
- diff --git a/tests/test_cle_design_review.py b/tests/test_cle_design_review.py index 38dc8f9..24b041f 100644 --- a/tests/test_cle_design_review.py +++ b/tests/test_cle_design_review.py @@ -2,7 +2,7 @@ from os.path import dirname, join import pytest # noqa -from city_scrapers_core.constants import ADVISORY_COMMITTEE, PASSED +from city_scrapers_core.constants import ADVISORY_COMMITTEE, PASSED, TENTATIVE from city_scrapers_core.utils import file_response from freezegun import freeze_time @@ -25,7 +25,7 @@ def test_count(): - assert len(parsed_items) == 96 + assert len(parsed_items) == 118 def test_title(): @@ -64,11 +64,7 @@ def test_location(): "name": "City Hall", "address": "601 Lakeside Ave, Room 514, Cleveland OH 44114", } - assert parsed_items[-1]["location"] == { - "address": "13512 Kinsman Road Cleveland, OH", - "name": "York-Rite Mason Temple", - } - + def test_source(): assert ( @@ -92,3 +88,60 @@ def test_classification(): def test_all_day(): assert parsed_items[0]["all_day"] is False + +# There's a second set of tests to make sure that we're correclty parsing out details for meetings based on calculated times + +def test_future_meeting_title(): + assert parsed_items[-1]["title"] == "Southeast Design Review Committee" + + +def test_future_meeting_description(): + assert parsed_items[-1]["description"] == "" + + +def test_future_meeting_start(): + assert parsed_items[-1]["start"] == datetime(2021, 12, 22, 17, 0) + + +def test_future_meeting_end(): + assert parsed_items[-1]["end"] is None + + +def test_future_meeting_time_notes(): + assert parsed_items[-1]["time_notes"] == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact mfields@clevelandohio.gov" + + +def test_future_meeting_id(): + assert ( + parsed_items[-1]["id"] + == "cle_design_review/202112221700/x/southeast_design_review_committee" + ) + + +def test_future_meeting_status(): + assert parsed_items[-1]["status"] == TENTATIVE + + +def test_future_meeting_location(): + assert parsed_items[-1]["location"] == { + "name": "York-Rite Mason Temple", + "address": "13512 Kinsman Road Cleveland, OH", + } + +def test_future_meeting_source(): + assert ( + parsed_items[-1]["source"] + == "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa + ) + + +def test_future_meeting_links(): + assert len(parsed_items[-1]["links"]) == 0 + + +def test_future_meeting_classification(): + assert parsed_items[-1]["classification"] == ADVISORY_COMMITTEE + + +def test_future_meeting_all_day(): + assert parsed_items[-1]["all_day"] is False From b97c5ec8d47cc7e149747f0937c9b825b76959ad Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 3 Dec 2021 10:32:55 -0500 Subject: [PATCH 06/12] Fixes all the linting issues --- city_scrapers/spiders/cle_design_review.py | 220 +++++++++++++-------- tests/test_cle_design_review.py | 22 ++- 2 files changed, 149 insertions(+), 93 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 1789ecc..4e9455b 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -6,7 +6,6 @@ from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting from city_scrapers_core.spiders import CityScrapersSpider -from scrapy import Selector class CleDesignReviewSpider(CityScrapersSpider): @@ -16,39 +15,53 @@ class CleDesignReviewSpider(CityScrapersSpider): start_urls = [ "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa ] - time_notes = "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact " + time_notes = "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact " # noqa def parse(self, response): """ - There's no element that wraps both the committee name/time and the dropdown containing the - agendas. As such we want to grab each committee name/times and then use the following dropdown - to get the agendas. Luckily all of the committee name/times are (and are the only thing in) divs with - the class '.mt-3' so we can grab all the divs with those classes and then look for the next sibling div with - the ".dropdown" class to get the links to all the agendas. - - Note that the city planning meeting is handled by a different scraper so we do look at it here. Luckily - the name/times for the city planning meeting are not currently wrapped in a div, so the list of nodes - described above won't include it. + There's no element that wraps both the committee name/time and + the dropdown containing the agendas. As such we want to grab + each committee name/times and then use the following dropdown + to get the agendas. Luckily all of the committee name/times are + (and are the only thing in) divs with the class '.mt-3' so we can + grab all the divs with those classes and then look for the next sibling + div with the ".dropdown" class to get the links to all the agendas. + + Note that the city planning meeting is handled by a different scraper so + we do look at it here. Luckily the name/times for the city planning + meeting are not currently wrapped in a div, so the list of nodes described + above won't include it. There are three other points to keep in mind for this scraper: - 1. The way the data is presented doesn't make it easy to know whether or not a meeting occurred but doesn't have an - agenda, or whether a meeting is going to happen on a normal meeting date. The strategy I'm using is to treat - the agenda links as authoritative for past (and if listed upcoming) meetings. So previous meetings are just read off of the - agenda links. For future meetings we take the date of the most recent agenda and then - calculate the remaining meetings this year from that date. As dates progress and agendas are added, those tentative meetings - will either be confirmed to exist or disappear based on the ways the agendas are updated. - - 2. There is no mention of the year anywhere in the text of the site. We can extract it from the agenda link - at least - for now. But it will be important to keep an eye on how the site is changed in January. - - 3. Meetings are currently not being held in person but over webex. We've included this information in the time_notes section of the - meeting. Perhaps a more general notes section would make a bit more sense, but given the current fields on the - meeting object, time notes seemed like a reasonable place to put this. + + 1. The way the data is presented doesn't make it easy to know whether or + not a meeting occurred but doesn't have an agenda, or whether a meeting + is going to happen on a normal meeting date. The strategy I'm using is + to treat the agenda links as authoritative for past (and if listed + upcoming) meetings. So previous meetings are just read off of the agenda + links. For future meetings we take the date of the most recent agenda + and then calculate the remaining meetings this year from that date. As + dates progress and agendas are added, those tentative meetings will + either be confirmed to exist or disappear based on the ways the agendas + are updated. + + 2. There is no mention of the year anywhere in the text of the site. We + can extract it from the agenda link - at least for now. But it will + be important to keep an eye on how the site is changed in January. + + 3. Meetings are currently not being held in person but over webex. We've + included this information in the time_notes section of the meeting. + Perhaps a more general notes section would make a bit more sense, but + given the current fields on the meeting object, time notes seemed like + a reasonable place to put this. """ - committee_metas = response.css("div.mt-3") # this skips city planning since it is handled by a separate scraper + committee_metas = response.css( + "div.mt-3" + ) # this skips city planning since it is handled by a separate scraper committee_agendas = response.css("div.mt-3 + div.dropdown") - if len(committee_metas) != len(committee_agendas): - # we haven't sucessfully extracted matched metas and agendas so we can't safely iterate over them together. + if len(committee_metas) != len(committee_agendas): + # we haven't sucessfully extracted matched metas and agendas so we + # can't safely iterate over them together. raise ValueError("Cannot match committee agandas to committee metadata") committee_items = zip(committee_metas, committee_agendas) @@ -59,16 +72,22 @@ def parse(self, response): location = self._parse_location(committee_meta) time_str = self._parse_time_str(committee_meta) email_contact = self._parse_email_contact(committee_meta) - weekday, chosen_weeks, is_downtown = self._parse_meeting_schedule_info(committee_meta) + weekday, chosen_weeks, is_downtown = self._parse_meeting_schedule_info( + committee_meta + ) most_recent_start = datetime.today() - + # Start by looking through the agendas for existing meetings for agenda in commitee_agenda_list.css("div.dropdown-menu a.dropdown-item"): - month_str, day_str = agenda.css("*::text").extract_first().strip().split(" ") + month_str, day_str = ( + agenda.css("*::text").extract_first().strip().split(" ") + ) year_str = self._parse_year_from_agenda_link(agenda) - + start = self._parse_start(year_str, month_str, day_str, time_str) - most_recent_start = start # most_recent_start will be used to calculate upcoming meetings with no agenda + # most_recent_start will be used to calculate upcoming meetings + # with no agenda + most_recent_start = start if not start: continue meeting = Meeting( @@ -87,15 +106,21 @@ def parse(self, response): meeting["status"] = self._get_status(meeting) meeting["id"] = self._get_id(meeting) - yield meeting + yield meeting - # next we calculate upcoming meeting dates for 60 days after the last agenda date + # next we calculate upcoming meeting dates for 60 days after the + # last agenda date calc_start = most_recent_start + timedelta(days=1) calc_end = calc_start + timedelta(days=60) - upcoming_meetings = self._calculate_upcoming_meeting_days(weekday, chosen_weeks, calc_start, calc_end) - if is_downtown: # downtown meetings are a day before the one calculated - tmp = [day+timedelta(days=-1) for day in upcoming_meetings] - upcoming_meetings = tmp # this was just to make sure we weren't mutating the list as we used in a comprehension + upcoming_meetings = self._calculate_upcoming_meeting_days( + weekday, chosen_weeks, calc_start, calc_end + ) + if is_downtown: # downtown meetings are a day before the one calculated + # this tmp is just to make sure we weren't mutating the list as + # we used it in a comprehension + tmp = [day + timedelta(days=-1) for day in upcoming_meetings] + upcoming_meetings = tmp + for day in upcoming_meetings: start = self._parse_calculated_start(day, time_str) meeting = Meeting( @@ -163,7 +188,7 @@ def _parse_location(self, item): split_loc = loc_str.split("-") loc_name = "-".join(split_loc[:-1]) loc_addr = split_loc[-1] - # We need to make sure that the address ends with the city and state + # We need to make sure that the address ends with the city and state if "Cleveland" not in loc_addr: loc_addr = loc_addr.strip() + " Cleveland, OH" return { @@ -174,12 +199,7 @@ def _parse_location(self, item): def _parse_links(self, item, response): """Parse out the links for the meeting""" links = [] - links.append( - { - "title": "Agenda", - "href": response.urljoin(item.attrib["href"]), - } - ) + links.append({"title": "Agenda", "href": response.urljoin(item.attrib["href"])}) return links def _parse_year_from_agenda_link(self, item): @@ -194,19 +214,20 @@ def _parse_email_contact(self, item): """Parses the email for a committee's contact""" email_str = item.css("p.mt-1::text").extract()[2] return email_str.replace(": ", "") - + def _parse_meeting_schedule_info(self, committee_meta): - """Parses out the weekday, and frequency of the meeting for calculating future dates""" - # Add special case for downtown downtown meetings are the day before city planning, - # so we calculate using the city planning schedule (1, and 3rd Friday) and set a flag - # so we can subtract a day from the results + """Parses out the weekday, and frequency of the meeting for calculating + future dates""" + # Add special case for downtown downtown meetings are the day before city + # planning, so we calculate using the city planning schedule (1, and 3rd + # Friday) and set a flag so we can subtract a day from the results committee_str = " ".join(committee_meta.css("p.mb-1::text").extract()) is_downtown = "prior to the City Planning Commission" in committee_str if is_downtown: weekday = 4 chosen_weeks = [0, 2] - else: + else: weekday_str = committee_meta.css("p.mb-1 strong::text").extract_first() weekday = self._parse_weekday(weekday_str) raw_weeks = re.findall(r"1st|2nd|3rd|4th", committee_str) @@ -215,28 +236,30 @@ def _parse_meeting_schedule_info(self, committee_meta): def _parse_weekday(self, weekday): """Parses weekday strings as their integer equivalent""" - # we cut off the last char of weekday, because it comes through with an 's' i.e. 'Tuesdays' + # we cut off the last char of weekday, because it comes through with + # an 's' i.e. 'Tuesdays' return time.strptime(weekday[:-1], "%A").tm_wday def _parse_ordinal(self, ordinal_str): """Parses ordinals as their integer equivalent beginning from 0""" - ordinal_lookup = { - "1st": 0, - "2nd": 1, - "3rd": 2, - "4th": 3 - } + ordinal_lookup = {"1st": 0, "2nd": 1, "3rd": 2, "4th": 3} return ordinal_lookup[ordinal_str.lower()] - def _calculate_upcoming_meeting_days(self, chosen_weekday, chosen_weeks, start, end): + def _calculate_upcoming_meeting_days( + self, chosen_weekday, chosen_weeks, start, end + ): """ - This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month for - any given time frame between start and end dates. + This function is used to calculate meeting dates described as the 1st + and 3rd Tuesday of a month for any given time frame between start and + end dates. Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third - start (date): the first day to begin calculating meetings from + chosen_weekday (int): the weekday that you're looking for. Monday is 0, + so in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st + and 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third + start (date): the first day to begin calculating meetings from end (date): the final day to be considered as a potential meeting date Returns: @@ -245,32 +268,49 @@ def _calculate_upcoming_meeting_days(self, chosen_weekday, chosen_weeks, start, current_month = start.month current_year = start.year - # current_month_days = calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - # raw_days = [(day, current_month, current_year) for day in current_month_days] - raw_dates = [] while not (current_month == end.month and current_year == end.year): - current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] - - # we can't easily use % arithmetic here since we're starting at 1, so it's a bit easier to read this way - current_month = current_month+1 if current_month != 12 else 1 + current_month_days = self._calculate_meeting_days_per_month( + chosen_weekday, chosen_weeks, current_year, current_month + ) + raw_dates = raw_dates + [ + date(current_year, current_month, day) for day in current_month_days + ] + + # we can't easily use % arithmetic here since we're starting at 1, so + # it's a bit easier to read this way + current_month = current_month + 1 if current_month != 12 else 1 if current_month == 1: current_year = current_year + 1 - + # add the days for the final month since they're missed by the loop - current_month_days = self._calculate_meeting_days_per_month(chosen_weekday, chosen_weeks, current_year, current_month) - raw_dates = raw_dates + [date(current_year, current_month, day) for day in current_month_days] - # we now have all the relevant dates for the given months but we need to filter out days before and after start and end - return [current_date for current_date in raw_dates if (start.date() <= current_date <= end.date())] + current_month_days = self._calculate_meeting_days_per_month( + chosen_weekday, chosen_weeks, current_year, current_month + ) + raw_dates = raw_dates + [ + date(current_year, current_month, day) for day in current_month_days + ] + # we now have all the relevant dates for the given months but we need to + # filter out days before and after start and end + return [ + current_date + for current_date in raw_dates + if (start.date() <= current_date <= end.date()) + ] - def _calculate_meeting_days_per_month(self, chosen_weekday, chosen_weeks, year, month): + def _calculate_meeting_days_per_month( + self, chosen_weekday, chosen_weeks, year, month + ): """ - This function is used to calculate meeting dates described as the 1 and 3rd Tuesday of a month. + This function is used to calculate meeting dates described as the 1 and 3rd + Tuesday of a month. Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st and 3rd. These days should be passed though starting the count from 0, i.e [0, 2] for first and third + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so + in the examples above this would be 2 + chosen_weeks (int[]): the particular days you're looking for - like 1st and + 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third year (int): the year as an integer month (int): the month as an integer @@ -279,10 +319,18 @@ def _calculate_meeting_days_per_month(self, chosen_weekday, chosen_weeks, year, """ days_of_the_month = calendar.Calendar().itermonthdays2(year, month) - # we create a list of all days in the month that are the proper weekday - day is 0 if it is outside the month - # but present to make complete first or last weeks - potential_days = [day for day, weekday in days_of_the_month if day != 0 and weekday == chosen_weekday] - # we then add one to the index and see if the resulting number is in the chosen_weeks array - chosen_days = [day for i, day in enumerate(potential_days) if (i) in chosen_weeks ] + # we create a list of all days in the month that are the proper weekday - + # day is 0 if it is outside the month but present to make complete first or + # last weeks + potential_days = [ + day + for day, weekday in days_of_the_month + if day != 0 and weekday == chosen_weekday + ] + # we then add one to the index and see if the resulting number is in the + # chosen_weeks array + chosen_days = [ + day for i, day in enumerate(potential_days) if (i) in chosen_weeks + ] return chosen_days diff --git a/tests/test_cle_design_review.py b/tests/test_cle_design_review.py index 24b041f..1dfa826 100644 --- a/tests/test_cle_design_review.py +++ b/tests/test_cle_design_review.py @@ -10,9 +10,7 @@ test_response = file_response( join(dirname(__file__), "files", "cle_design_review.html"), - url=( - "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa - ), + url=("https://planning.clevelandohio.gov/designreview/schedule.php"), # noqa ) spider = CleDesignReviewSpider() @@ -45,7 +43,10 @@ def test_end(): def test_time_notes(): - assert parsed_items[0]["time_notes"] == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact asantora@clevelandohio.gov" + assert ( + parsed_items[0]["time_notes"] + == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact asantora@clevelandohio.gov" # noqa + ) def test_id(): @@ -64,7 +65,7 @@ def test_location(): "name": "City Hall", "address": "601 Lakeside Ave, Room 514, Cleveland OH 44114", } - + def test_source(): assert ( @@ -89,7 +90,10 @@ def test_classification(): def test_all_day(): assert parsed_items[0]["all_day"] is False -# There's a second set of tests to make sure that we're correclty parsing out details for meetings based on calculated times + +""" There's a second set of tests to make sure that we're correctly parsing +out details for meetings based on calculated times""" + def test_future_meeting_title(): assert parsed_items[-1]["title"] == "Southeast Design Review Committee" @@ -108,7 +112,10 @@ def test_future_meeting_end(): def test_future_meeting_time_notes(): - assert parsed_items[-1]["time_notes"] == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact mfields@clevelandohio.gov" + assert ( + parsed_items[-1]["time_notes"] + == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact mfields@clevelandohio.gov" # noqa + ) def test_future_meeting_id(): @@ -128,6 +135,7 @@ def test_future_meeting_location(): "address": "13512 Kinsman Road Cleveland, OH", } + def test_future_meeting_source(): assert ( parsed_items[-1]["source"] From 8d9b5d85cff60223bb0d15e0e3f369853155a4fe Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 3 Dec 2021 10:41:17 -0500 Subject: [PATCH 07/12] Fix small comments issues --- city_scrapers/spiders/cle_design_review.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 4e9455b..eb605d1 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -38,16 +38,16 @@ def parse(self, response): not a meeting occurred but doesn't have an agenda, or whether a meeting is going to happen on a normal meeting date. The strategy I'm using is to treat the agenda links as authoritative for past (and if listed - upcoming) meetings. So previous meetings are just read off of the agenda - links. For future meetings we take the date of the most recent agenda - and then calculate the remaining meetings this year from that date. As - dates progress and agendas are added, those tentative meetings will - either be confirmed to exist or disappear based on the ways the agendas - are updated. + upcoming) meetings. So previous meetings are just read off of the agenda + links. For future meetings we take the date of the most recent agenda + and then calculate meetings for 60 days from that date. As dates + progress and agendas are added, those tentative meetings will either be + confirmed to exist or disappear based on the ways the agendas are + updated. - 2. There is no mention of the year anywhere in the text of the site. We - can extract it from the agenda link - at least for now. But it will - be important to keep an eye on how the site is changed in January. + 2. There is no mention of the year anywhere in the text of the site. We + can extract it from the agenda link - at least for now. But it will + be important to keep an eye on how the site is changed in January. 3. Meetings are currently not being held in person but over webex. We've included this information in the time_notes section of the meeting. @@ -327,8 +327,7 @@ def _calculate_meeting_days_per_month( for day, weekday in days_of_the_month if day != 0 and weekday == chosen_weekday ] - # we then add one to the index and see if the resulting number is in the - # chosen_weeks array + # we then see if the resulting number is in the chosen_weeks array chosen_days = [ day for i, day in enumerate(potential_days) if (i) in chosen_weeks ] From 59322d554ff5445072123ccc8f1b25840daf1fb5 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 3 Dec 2021 10:54:15 -0500 Subject: [PATCH 08/12] Fix import order --- city_scrapers/spiders/cle_design_review.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index eb605d1..4860f05 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -1,7 +1,7 @@ -import re import calendar +import re import time -from datetime import datetime, date, timedelta +from datetime import date, datetime, timedelta from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting From ef7f84f832bfdba203621cc5f4a3cadc9133efb8 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Tue, 7 Dec 2021 12:23:03 -0500 Subject: [PATCH 09/12] Moves webex info to meeting, removes unnecessary tmp and fixes downtown timing bug --- 1 | 0 city_scrapers/spiders/cle_design_review.py | 33 +++++++++++++--------- tests/test_cle_design_review.py | 22 +++++++-------- 3 files changed, 30 insertions(+), 25 deletions(-) create mode 100644 1 diff --git a/1 b/1 new file mode 100644 index 0000000..e69de29 diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 4860f05..8b09565 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -15,7 +15,8 @@ class CleDesignReviewSpider(CityScrapersSpider): start_urls = [ "https://planning.clevelandohio.gov/designreview/schedule.php" # noqa ] - time_notes = "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact " # noqa + description = "Due to Covid meetings are being held on WebEx rather than in person. For more information contact " # noqa + calculated_description = "This is an upcoming meeting - please verify it with staff if you want attend. Due to Covid meetings are being held on WebEx rather than in person. For more information contact " # noqa def parse(self, response): """ @@ -43,17 +44,16 @@ def parse(self, response): and then calculate meetings for 60 days from that date. As dates progress and agendas are added, those tentative meetings will either be confirmed to exist or disappear based on the ways the agendas are - updated. + updated. For calculated meetings we add a line to the description + encouraging users to verify the meeting with staff before attempting to + attend. 2. There is no mention of the year anywhere in the text of the site. We can extract it from the agenda link - at least for now. But it will be important to keep an eye on how the site is changed in January. 3. Meetings are currently not being held in person but over webex. We've - included this information in the time_notes section of the meeting. - Perhaps a more general notes section would make a bit more sense, but - given the current fields on the meeting object, time notes seemed like - a reasonable place to put this. + included this information in the meeting description. """ committee_metas = response.css( "div.mt-3" @@ -92,12 +92,12 @@ def parse(self, response): continue meeting = Meeting( title=title, - description="", + description=self.description + email_contact, classification=ADVISORY_COMMITTEE, start=start, end=None, all_day=False, - time_notes=self.time_notes + email_contact, + time_notes="", location=location, links=self._parse_links(agenda, response), source=response.url, @@ -111,26 +111,31 @@ def parse(self, response): # next we calculate upcoming meeting dates for 60 days after the # last agenda date calc_start = most_recent_start + timedelta(days=1) + # since downtown meetings are calculated based on the city planning + # meeting one day ahead, we need to add an extra day to avoid + if is_downtown: + calc_start = calc_start + timedelta(days=1) + calc_end = calc_start + timedelta(days=60) + upcoming_meetings = self._calculate_upcoming_meeting_days( weekday, chosen_weeks, calc_start, calc_end ) if is_downtown: # downtown meetings are a day before the one calculated - # this tmp is just to make sure we weren't mutating the list as - # we used it in a comprehension - tmp = [day + timedelta(days=-1) for day in upcoming_meetings] - upcoming_meetings = tmp + upcoming_meetings = [ + day + timedelta(days=-1) for day in upcoming_meetings + ] for day in upcoming_meetings: start = self._parse_calculated_start(day, time_str) meeting = Meeting( title=title, - description="", + description=self.calculated_description + email_contact, classification=ADVISORY_COMMITTEE, start=start, end=None, all_day=False, - time_notes=self.time_notes + email_contact, + time_notes="", location=location, links=[], source=response.url, diff --git a/tests/test_cle_design_review.py b/tests/test_cle_design_review.py index 1dfa826..49b2501 100644 --- a/tests/test_cle_design_review.py +++ b/tests/test_cle_design_review.py @@ -23,7 +23,7 @@ def test_count(): - assert len(parsed_items) == 118 + assert len(parsed_items) == 117 def test_title(): @@ -31,7 +31,10 @@ def test_title(): def test_description(): - assert parsed_items[0]["description"] == "" + assert ( + parsed_items[0]["description"] + == "Due to Covid meetings are being held on WebEx rather than in person. For more information contact asantora@clevelandohio.gov" # noqa + ) def test_start(): @@ -43,10 +46,7 @@ def test_end(): def test_time_notes(): - assert ( - parsed_items[0]["time_notes"] - == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact asantora@clevelandohio.gov" # noqa - ) + assert parsed_items[0]["time_notes"] == "" def test_id(): @@ -100,7 +100,10 @@ def test_future_meeting_title(): def test_future_meeting_description(): - assert parsed_items[-1]["description"] == "" + assert ( + parsed_items[-1]["description"] + == "This is an upcoming meeting - please verify it with staff if you want attend. Due to Covid meetings are being held on WebEx rather than in person. For more information contact mfields@clevelandohio.gov" # noqa + ) def test_future_meeting_start(): @@ -112,10 +115,7 @@ def test_future_meeting_end(): def test_future_meeting_time_notes(): - assert ( - parsed_items[-1]["time_notes"] - == "Due to Covid meetings are generally being held on WebEx rather than in person. For more information contact mfields@clevelandohio.gov" # noqa - ) + assert parsed_items[-1]["time_notes"] == "" def test_future_meeting_id(): From 63de0535887b4fb63653ae77c5f594653e0e8479 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Thu, 16 Dec 2021 18:02:10 -0500 Subject: [PATCH 10/12] moves meeting date calculator --- 1 | 0 city_scrapers/spiders/cle_design_review.py | 106 ++---------------- city_scrapers/utils/__init__.py | 1 + .../utils/meeting_date_calculator.py | 104 +++++++++++++++++ 4 files changed, 115 insertions(+), 96 deletions(-) delete mode 100644 1 create mode 100644 city_scrapers/utils/__init__.py create mode 100644 city_scrapers/utils/meeting_date_calculator.py diff --git a/1 b/1 deleted file mode 100644 index e69de29..0000000 diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 8b09565..2832ba3 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -1,11 +1,11 @@ -import calendar import re import time -from datetime import date, datetime, timedelta +from datetime import datetime, timedelta from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting from city_scrapers_core.spiders import CityScrapersSpider +from city_scrapers.utils import MeetingDateCalculator class CleDesignReviewSpider(CityScrapersSpider): @@ -72,7 +72,7 @@ def parse(self, response): location = self._parse_location(committee_meta) time_str = self._parse_time_str(committee_meta) email_contact = self._parse_email_contact(committee_meta) - weekday, chosen_weeks, is_downtown = self._parse_meeting_schedule_info( + weekday, chosen_ordinals, is_downtown = self._parse_meeting_schedule_info( committee_meta ) most_recent_start = datetime.today() @@ -118,8 +118,8 @@ def parse(self, response): calc_end = calc_start + timedelta(days=60) - upcoming_meetings = self._calculate_upcoming_meeting_days( - weekday, chosen_weeks, calc_start, calc_end + upcoming_meetings = MeetingDateCalculator.calculate_upcoming_meeting_days( + weekday, chosen_ordinals, calc_start, calc_end ) if is_downtown: # downtown meetings are a day before the one calculated upcoming_meetings = [ @@ -231,13 +231,14 @@ def _parse_meeting_schedule_info(self, committee_meta): if is_downtown: weekday = 4 - chosen_weeks = [0, 2] + chosen_ordinals = [0, 2] else: weekday_str = committee_meta.css("p.mb-1 strong::text").extract_first() weekday = self._parse_weekday(weekday_str) raw_weeks = re.findall(r"1st|2nd|3rd|4th", committee_str) - chosen_weeks = [self._parse_ordinal(ordinal) for ordinal in raw_weeks] - return weekday, chosen_weeks, is_downtown + # ordinals here just refer to the 1st, 2nd etc... + chosen_ordinals = [self._parse_ordinal(ordinal) for ordinal in raw_weeks] + return weekday, chosen_ordinals, is_downtown def _parse_weekday(self, weekday): """Parses weekday strings as their integer equivalent""" @@ -250,91 +251,4 @@ def _parse_ordinal(self, ordinal_str): ordinal_lookup = {"1st": 0, "2nd": 1, "3rd": 2, "4th": 3} return ordinal_lookup[ordinal_str.lower()] - def _calculate_upcoming_meeting_days( - self, chosen_weekday, chosen_weeks, start, end - ): - """ - This function is used to calculate meeting dates described as the 1st - and 3rd Tuesday of a month for any given time frame between start and - end dates. - - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, - so in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st - and 3rd. These days should be passed though starting the count from 0, - i.e [0, 2] for first and third - start (date): the first day to begin calculating meetings from - end (date): the final day to be considered as a potential meeting date - - Returns: - []date: an array of dates that match the given conditions - """ - current_month = start.month - current_year = start.year - - raw_dates = [] - while not (current_month == end.month and current_year == end.year): - current_month_days = self._calculate_meeting_days_per_month( - chosen_weekday, chosen_weeks, current_year, current_month - ) - raw_dates = raw_dates + [ - date(current_year, current_month, day) for day in current_month_days - ] - - # we can't easily use % arithmetic here since we're starting at 1, so - # it's a bit easier to read this way - current_month = current_month + 1 if current_month != 12 else 1 - if current_month == 1: - current_year = current_year + 1 - - # add the days for the final month since they're missed by the loop - current_month_days = self._calculate_meeting_days_per_month( - chosen_weekday, chosen_weeks, current_year, current_month - ) - raw_dates = raw_dates + [ - date(current_year, current_month, day) for day in current_month_days - ] - # we now have all the relevant dates for the given months but we need to - # filter out days before and after start and end - return [ - current_date - for current_date in raw_dates - if (start.date() <= current_date <= end.date()) - ] - - def _calculate_meeting_days_per_month( - self, chosen_weekday, chosen_weeks, year, month - ): - """ - This function is used to calculate meeting dates described as the 1 and 3rd - Tuesday of a month. - - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so - in the examples above this would be 2 - chosen_weeks (int[]): the particular days you're looking for - like 1st and - 3rd. These days should be passed though starting the count from 0, - i.e [0, 2] for first and third - year (int): the year as an integer - month (int): the month as an integer - - Returns: - []int: an array of the days of the month that matched the given conditions. - """ - - days_of_the_month = calendar.Calendar().itermonthdays2(year, month) - # we create a list of all days in the month that are the proper weekday - - # day is 0 if it is outside the month but present to make complete first or - # last weeks - potential_days = [ - day - for day, weekday in days_of_the_month - if day != 0 and weekday == chosen_weekday - ] - # we then see if the resulting number is in the chosen_weeks array - chosen_days = [ - day for i, day in enumerate(potential_days) if (i) in chosen_weeks - ] - - return chosen_days + \ No newline at end of file diff --git a/city_scrapers/utils/__init__.py b/city_scrapers/utils/__init__.py new file mode 100644 index 0000000..5a2a653 --- /dev/null +++ b/city_scrapers/utils/__init__.py @@ -0,0 +1 @@ +from .meeting_date_calculator import MeetingDateCalculator \ No newline at end of file diff --git a/city_scrapers/utils/meeting_date_calculator.py b/city_scrapers/utils/meeting_date_calculator.py new file mode 100644 index 0000000..d72e641 --- /dev/null +++ b/city_scrapers/utils/meeting_date_calculator.py @@ -0,0 +1,104 @@ +import calendar +from datetime import date + +class MeetingDateCalculator: + + @staticmethod + def calculate_upcoming_meeting_days( + chosen_weekday, chosen_ordinals, start, end + ): + """ + Lots of city meeting websites describe their upcoming meetings by saying + things like: "this committee meets the 1st and 3rd Tuesday of every month". + This calculator is intended to help parse dates from such a description. It + doesn't handle parsing the actual language, since that might differ from page + to page, but given a weekday, and a list of the oridnals you care about (like + 1st, 3rd), a start date and an end date, it will return all the meeting dates + that match the weekday and ordinals. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, + so in the examples above this would be 2 + chosen_ordinals (int[]): the particular days you're looking for - like 1st + and 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third + start (date): the first day to begin calculating meetings from + end (date): the final day to be considered as a potential meeting date + + Returns: + []date: an array of dates that match the given conditions + """ + current_month = start.month + current_year = start.year + + raw_dates = [] + while not (current_month == end.month and current_year == end.year): + current_month_days = MeetingDateCalculator._calculate_meeting_days_per_month( + chosen_weekday, chosen_ordinals, current_year, current_month + ) + raw_dates = raw_dates + [ + date(current_year, current_month, day) for day in current_month_days + ] + + # we can't easily use % arithmetic here since we're starting at 1, so + # it's a bit easier to read this way + current_month = current_month + 1 if current_month != 12 else 1 + if current_month == 1: + current_year = current_year + 1 + + # add the days for the final month since they're missed by the loop + current_month_days = MeetingDateCalculator._calculate_meeting_days_per_month( + chosen_weekday, chosen_ordinals, current_year, current_month + ) + raw_dates = raw_dates + [ + date(current_year, current_month, day) for day in current_month_days + ] + # we now have all the relevant dates for the given months but we need to + # filter out days before and after start and end + return [ + current_date + for current_date in raw_dates + if (start.date() <= current_date <= end.date()) + ] + + @staticmethod + def _calculate_meeting_days_per_month( + chosen_weekday, chosen_ordinals, year, month + ): + """ + Lots of city meeting websites describe their upcoming meetings by saying + things like: "this committee meets the 1st and 3rd Tuesday of every month". + This calculator is intended to help parse dates from such a description. It + doesn't handle parsing the actual language, since that might differ from page + to page, but given a weekday, and a list of the oridnals you care about (like + 1st, 3rd) and a month it will return all the days in the month that match the + given conditions. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so + in the examples above this would be 2 + chosen_ordinals (int[]): the particular days you're looking for - like 1st and + 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third + year (int): the year as an integer + month (int): the month as an integer + + Returns: + []int: an array of the days of the month that matched the given conditions. + """ + + days_of_the_month = calendar.Calendar().itermonthdays2(year, month) + # we create a list of all days in the month that are the proper weekday - + # day is 0 if it is outside the month but present to make complete first or + # last weeks + potential_days = [ + day + for day, weekday in days_of_the_month + if day != 0 and weekday == chosen_weekday + ] + # we then see if the resulting number is in the chosen_weeks array + chosen_days = [ + day for i, day in enumerate(potential_days) if (i) in chosen_ordinals + ] + + return chosen_days From 80dbbc3fd6c41f29bbd3b2ba617a3a4e796c5b47 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 17 Dec 2021 11:11:40 -0500 Subject: [PATCH 11/12] Gets tests written and everything linted --- city_scrapers/spiders/cle_design_review.py | 7 +- city_scrapers/utils/__init__.py | 2 +- .../utils/meeting_date_calculator.py | 168 +++++++++--------- tests/test_meeting_date_calculator.py | 88 +++++++++ 4 files changed, 173 insertions(+), 92 deletions(-) create mode 100644 tests/test_meeting_date_calculator.py diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 2832ba3..573f010 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -5,7 +5,8 @@ from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.items import Meeting from city_scrapers_core.spiders import CityScrapersSpider -from city_scrapers.utils import MeetingDateCalculator + +from city_scrapers.utils import calculate_upcoming_meeting_days class CleDesignReviewSpider(CityScrapersSpider): @@ -118,7 +119,7 @@ def parse(self, response): calc_end = calc_start + timedelta(days=60) - upcoming_meetings = MeetingDateCalculator.calculate_upcoming_meeting_days( + upcoming_meetings = calculate_upcoming_meeting_days( weekday, chosen_ordinals, calc_start, calc_end ) if is_downtown: # downtown meetings are a day before the one calculated @@ -250,5 +251,3 @@ def _parse_ordinal(self, ordinal_str): """Parses ordinals as their integer equivalent beginning from 0""" ordinal_lookup = {"1st": 0, "2nd": 1, "3rd": 2, "4th": 3} return ordinal_lookup[ordinal_str.lower()] - - \ No newline at end of file diff --git a/city_scrapers/utils/__init__.py b/city_scrapers/utils/__init__.py index 5a2a653..67e146f 100644 --- a/city_scrapers/utils/__init__.py +++ b/city_scrapers/utils/__init__.py @@ -1 +1 @@ -from .meeting_date_calculator import MeetingDateCalculator \ No newline at end of file +from .meeting_date_calculator import calculate_upcoming_meeting_days # noqa diff --git a/city_scrapers/utils/meeting_date_calculator.py b/city_scrapers/utils/meeting_date_calculator.py index d72e641..0cc821d 100644 --- a/city_scrapers/utils/meeting_date_calculator.py +++ b/city_scrapers/utils/meeting_date_calculator.py @@ -1,104 +1,98 @@ import calendar from datetime import date -class MeetingDateCalculator: - @staticmethod - def calculate_upcoming_meeting_days( - chosen_weekday, chosen_ordinals, start, end - ): - """ - Lots of city meeting websites describe their upcoming meetings by saying - things like: "this committee meets the 1st and 3rd Tuesday of every month". - This calculator is intended to help parse dates from such a description. It - doesn't handle parsing the actual language, since that might differ from page - to page, but given a weekday, and a list of the oridnals you care about (like - 1st, 3rd), a start date and an end date, it will return all the meeting dates - that match the weekday and ordinals. +def calculate_upcoming_meeting_days(chosen_weekday, chosen_ordinals, start, end): + """ + Lots of city meeting websites describe their upcoming meetings by saying + things like: "this committee meets the 1st and 3rd Tuesday of every month ". + This calculator is intended to help parse dates from such a description. It + doesn't handle parsing the actual language, since that might differ from page + to page, but given a weekday, and a list of the oridnals you care about (like + 1st, 3rd), a start date and an end date, it will return all the meeting dates + that match the weekday and ordinals. - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, - so in the examples above this would be 2 - chosen_ordinals (int[]): the particular days you're looking for - like 1st - and 3rd. These days should be passed though starting the count from 0, - i.e [0, 2] for first and third - start (date): the first day to begin calculating meetings from - end (date): the final day to be considered as a potential meeting date + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, + so in the examples above this would be 2 + chosen_ordinals (int[]): the particular days you're looking for - like 1st + and 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third + start (date): the first day to begin calculating meetings from + end (date): the final day to be considered as a potential meeting date - Returns: - []date: an array of dates that match the given conditions - """ - current_month = start.month - current_year = start.year + Returns: + []date: an array of dates that match the given conditions + """ + current_month = start.month + current_year = start.year - raw_dates = [] - while not (current_month == end.month and current_year == end.year): - current_month_days = MeetingDateCalculator._calculate_meeting_days_per_month( - chosen_weekday, chosen_ordinals, current_year, current_month - ) - raw_dates = raw_dates + [ - date(current_year, current_month, day) for day in current_month_days - ] - - # we can't easily use % arithmetic here since we're starting at 1, so - # it's a bit easier to read this way - current_month = current_month + 1 if current_month != 12 else 1 - if current_month == 1: - current_year = current_year + 1 - - # add the days for the final month since they're missed by the loop - current_month_days = MeetingDateCalculator._calculate_meeting_days_per_month( + raw_dates = [] + while not (current_month == end.month and current_year == end.year): + current_month_days = _calculate_meeting_days_per_month( chosen_weekday, chosen_ordinals, current_year, current_month ) raw_dates = raw_dates + [ date(current_year, current_month, day) for day in current_month_days ] - # we now have all the relevant dates for the given months but we need to - # filter out days before and after start and end - return [ - current_date - for current_date in raw_dates - if (start.date() <= current_date <= end.date()) - ] - @staticmethod - def _calculate_meeting_days_per_month( - chosen_weekday, chosen_ordinals, year, month - ): - """ - Lots of city meeting websites describe their upcoming meetings by saying - things like: "this committee meets the 1st and 3rd Tuesday of every month". - This calculator is intended to help parse dates from such a description. It - doesn't handle parsing the actual language, since that might differ from page - to page, but given a weekday, and a list of the oridnals you care about (like - 1st, 3rd) and a month it will return all the days in the month that match the - given conditions. + # we can't easily use % arithmetic here since we're starting at 1, so + # it's a bit easier to read this way + current_month = current_month + 1 if current_month != 12 else 1 + if current_month == 1: + current_year = current_year + 1 - Parameters: - chosen_weekday (int): the weekday that you're looking for. Monday is 0, so - in the examples above this would be 2 - chosen_ordinals (int[]): the particular days you're looking for - like 1st and - 3rd. These days should be passed though starting the count from 0, - i.e [0, 2] for first and third - year (int): the year as an integer - month (int): the month as an integer + # add the days for the final month since they're missed by the loop + current_month_days = _calculate_meeting_days_per_month( + chosen_weekday, chosen_ordinals, current_year, current_month + ) + raw_dates = raw_dates + [ + date(current_year, current_month, day) for day in current_month_days + ] + # we now have all the relevant dates for the given months but we need to + # filter out days before and after start and end + return [ + current_date + for current_date in raw_dates + if (start.date() <= current_date <= end.date()) + ] - Returns: - []int: an array of the days of the month that matched the given conditions. - """ - days_of_the_month = calendar.Calendar().itermonthdays2(year, month) - # we create a list of all days in the month that are the proper weekday - - # day is 0 if it is outside the month but present to make complete first or - # last weeks - potential_days = [ - day - for day, weekday in days_of_the_month - if day != 0 and weekday == chosen_weekday - ] - # we then see if the resulting number is in the chosen_weeks array - chosen_days = [ - day for i, day in enumerate(potential_days) if (i) in chosen_ordinals - ] +def _calculate_meeting_days_per_month(chosen_weekday, chosen_ordinals, year, month): + """ + Lots of city meeting websites describe their upcoming meetings by saying + things like: "this committee meets the 1st and 3rd Tuesday of every month". + This calculator is intended to help parse dates from such a description. It + doesn't handle parsing the actual language, since that might differ from page + to page, but given a weekday, and a list of the oridnals you care about (like + 1st, 3rd) and a month it will return all the days in the month that match the + given conditions. + + Parameters: + chosen_weekday (int): the weekday that you're looking for. Monday is 0, so + in the examples above this would be 2 + chosen_ordinals (int[]): the particular days you're looking for - like 1st and + 3rd. These days should be passed though starting the count from 0, + i.e [0, 2] for first and third + year (int): the year as an integer + month (int): the month as an integer + + Returns: + []int: an array of the days of the month that matched the given conditions. + """ + + days_of_the_month = calendar.Calendar().itermonthdays2(year, month) + # we create a list of all days in the month that are the proper weekday - + # day is 0 if it is outside the month but present to make complete first or + # last weeks + potential_days = [ + day + for day, weekday in days_of_the_month + if day != 0 and weekday == chosen_weekday + ] + # we then see if the resulting number is in the chosen_weeks array + chosen_days = [ + day for i, day in enumerate(potential_days) if (i) in chosen_ordinals + ] - return chosen_days + return chosen_days diff --git a/tests/test_meeting_date_calculator.py b/tests/test_meeting_date_calculator.py new file mode 100644 index 0000000..2534e26 --- /dev/null +++ b/tests/test_meeting_date_calculator.py @@ -0,0 +1,88 @@ +from datetime import date, datetime + +import pytest # noqa + +from city_scrapers.utils import calculate_upcoming_meeting_days + +start = datetime(2021, 12, 1) +end = datetime(2022, 1, 1) + + +# test a random input - 1 and 3rd tuesday (1) +def test_happy_path(): + expected = [date(2021, 12, 7), date(2021, 12, 21)] + + out = calculate_upcoming_meeting_days(1, [0, 2], start, end) + assert out == expected + + +def test_single_day(): + expected = [date(2021, 12, 14)] + + out = calculate_upcoming_meeting_days(1, [1], start, end) + assert out == expected + + +def test_multiple_months(): + end = datetime(2022, 2, 1) + expected = [ + date(2021, 12, 14), + date(2021, 12, 28), + date(2022, 1, 11), + date(2022, 1, 25), + ] + + out = calculate_upcoming_meeting_days(1, [1, 3], start, end) + assert out == expected + + +def test_start(): + start = datetime(2021, 12, 8) + expected = [date(2021, 12, 21)] + + out = calculate_upcoming_meeting_days(1, [0, 2], start, end) + assert out == expected + + +def test_start_is_inclusive(): + start = datetime(2021, 12, 7) + expected = [date(2021, 12, 7), date(2021, 12, 21)] + + out = calculate_upcoming_meeting_days(1, [0, 2], start, end) + assert out == expected + + +def test_end(): + end = datetime(2021, 12, 20) + expected = [date(2021, 12, 7)] + + out = calculate_upcoming_meeting_days(1, [0, 2], start, end) + assert out == expected + + +def test_end_is_inclusive(): + end = datetime(2021, 12, 21) + expected = [date(2021, 12, 7), date(2021, 12, 21)] + + out = calculate_upcoming_meeting_days(1, [0, 2], start, end) + assert out == expected + + +def test_all_5(): + expected = [ + date(2021, 12, 1), + date(2021, 12, 8), + date(2021, 12, 15), + date(2021, 12, 22), + date(2021, 12, 29), + ] + + out = calculate_upcoming_meeting_days(2, [0, 1, 2, 3, 4], start, end) + assert out == expected + + +def test_ordinals_over_4_are_ignored(): + expected = [] + + out = calculate_upcoming_meeting_days(1, [5, 6, 7, 8], start, end) + assert out == expected From ee4570b2cd1a3857f9b5e71231ce1e92c5580287 Mon Sep 17 00:00:00 2001 From: Zee Abrahams Date: Fri, 17 Dec 2021 11:23:13 -0500 Subject: [PATCH 12/12] cleans up date vs datetime in the calculator --- city_scrapers/spiders/cle_design_review.py | 2 +- city_scrapers/utils/meeting_date_calculator.py | 4 +--- tests/test_meeting_date_calculator.py | 16 ++++++++-------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/city_scrapers/spiders/cle_design_review.py b/city_scrapers/spiders/cle_design_review.py index 573f010..e03da92 100644 --- a/city_scrapers/spiders/cle_design_review.py +++ b/city_scrapers/spiders/cle_design_review.py @@ -120,7 +120,7 @@ def parse(self, response): calc_end = calc_start + timedelta(days=60) upcoming_meetings = calculate_upcoming_meeting_days( - weekday, chosen_ordinals, calc_start, calc_end + weekday, chosen_ordinals, calc_start.date(), calc_end.date() ) if is_downtown: # downtown meetings are a day before the one calculated upcoming_meetings = [ diff --git a/city_scrapers/utils/meeting_date_calculator.py b/city_scrapers/utils/meeting_date_calculator.py index 0cc821d..463358a 100644 --- a/city_scrapers/utils/meeting_date_calculator.py +++ b/city_scrapers/utils/meeting_date_calculator.py @@ -52,9 +52,7 @@ def calculate_upcoming_meeting_days(chosen_weekday, chosen_ordinals, start, end) # we now have all the relevant dates for the given months but we need to # filter out days before and after start and end return [ - current_date - for current_date in raw_dates - if (start.date() <= current_date <= end.date()) + current_date for current_date in raw_dates if (start <= current_date <= end) ] diff --git a/tests/test_meeting_date_calculator.py b/tests/test_meeting_date_calculator.py index 2534e26..14d3c9f 100644 --- a/tests/test_meeting_date_calculator.py +++ b/tests/test_meeting_date_calculator.py @@ -1,11 +1,11 @@ -from datetime import date, datetime +from datetime import date import pytest # noqa from city_scrapers.utils import calculate_upcoming_meeting_days -start = datetime(2021, 12, 1) -end = datetime(2022, 1, 1) +start = date(2021, 12, 1) +end = date(2022, 1, 1) # test a random input - 1 and 3rd tuesday (1) @@ -24,7 +24,7 @@ def test_single_day(): def test_multiple_months(): - end = datetime(2022, 2, 1) + end = date(2022, 2, 1) expected = [ date(2021, 12, 14), date(2021, 12, 28), @@ -37,7 +37,7 @@ def test_multiple_months(): def test_start(): - start = datetime(2021, 12, 8) + start = date(2021, 12, 8) expected = [date(2021, 12, 21)] out = calculate_upcoming_meeting_days(1, [0, 2], start, end) @@ -45,7 +45,7 @@ def test_start(): def test_start_is_inclusive(): - start = datetime(2021, 12, 7) + start = date(2021, 12, 7) expected = [date(2021, 12, 7), date(2021, 12, 21)] out = calculate_upcoming_meeting_days(1, [0, 2], start, end) @@ -53,7 +53,7 @@ def test_start_is_inclusive(): def test_end(): - end = datetime(2021, 12, 20) + end = date(2021, 12, 20) expected = [date(2021, 12, 7)] out = calculate_upcoming_meeting_days(1, [0, 2], start, end) @@ -61,7 +61,7 @@ def test_end(): def test_end_is_inclusive(): - end = datetime(2021, 12, 21) + end = date(2021, 12, 21) expected = [date(2021, 12, 7), date(2021, 12, 21)] out = calculate_upcoming_meeting_days(1, [0, 2], start, end)