Skip to content

Commit

Permalink
Fixing aggregate query performance (#950)
Browse files Browse the repository at this point in the history
  • Loading branch information
devinmatte authored Feb 11, 2024
1 parent 8e8036d commit 22d43fa
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
6 changes: 3 additions & 3 deletions server/chalicelib/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def parallel_download_events(datestop):


def download_events(sdate, edate, stops: list):
# This used to be month_range but updated to date_range to support live ranges
# If something breaks, this may be why
datestops = itertools.product(parallel.date_range(sdate, edate), stops)
# This needs to be month_range for performance and memory,
# however, for data from gobble we'll need specific dates, not just first of the month
datestops = itertools.product(parallel.month_range(sdate, edate), stops)
result = parallel_download_events(datestops)
result = filter(lambda row: sdate.strftime("%Y-%m-%d") <= row["service_date"] <= edate.strftime("%Y-%m-%d"), result)
return sorted(result, key=lambda row: row["event_time"])
Expand Down
1 change: 1 addition & 0 deletions server/chalicelib/s3_historical.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def headways(stop_ids: list, sdate, edate):
headway_time_sec = delta.total_seconds()

# Throw out any headways > 120 min
# TODO: We can't do this anymore for CR data
if headway_time_sec > 120 * 60:
continue

Expand Down

0 comments on commit 22d43fa

Please sign in to comment.