Skip to content

Commit

Permalink
http caching for metadata (#1222)
Browse files Browse the repository at this point in the history
allow http caching of metadata endpoint (plus a small margin), and removed staleness logging
  • Loading branch information
melange396 authored Jul 10, 2023
1 parent f7a279b commit 59385b9
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 53 deletions.
18 changes: 11 additions & 7 deletions src/server/_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ def __init__(self):
self.result: int = -1
self._max_results: int = MAX_COMPATIBILITY_RESULTS if is_compatibility_mode() else MAX_RESULTS

def make_response(self, gen):
def make_response(self, gen, headers=None):
return Response(
gen,
mimetype="application/json",
headers=headers,
)

def __call__(self, generator: Iterable[Dict[str, Any]]) -> Response:
def __call__(self, generator: Iterable[Dict[str, Any]], headers=None) -> Response:
def gen():
self.result = -2 # no result, default response
began = False
Expand Down Expand Up @@ -84,7 +85,7 @@ def gen():
if r is not None:
yield r

return self.make_response(stream_with_context(gen()))
return self.make_response(stream_with_context(gen()), headers=headers)

@property
def remaining_rows(self) -> int:
Expand Down Expand Up @@ -223,8 +224,11 @@ def __init__(self, filename: Optional[str] = "epidata"):
super(CSVPrinter, self).__init__()
self._filename = filename

def make_response(self, gen):
headers = {"Content-Disposition": f"attachment; filename={self._filename}.csv"} if self._filename else {}
def make_response(self, gen, headers=None):
if headers is None:
headers = {}
if self._filename:
headers["Content-Disposition"] = f"attachment; filename={self._filename}.csv"
return Response(gen, mimetype="text/csv; charset=utf8", headers=headers)

def _begin(self):
Expand Down Expand Up @@ -296,8 +300,8 @@ class JSONLPrinter(APrinter):
a printer class writing in JSONLines format
"""

def make_response(self, gen):
return Response(gen, mimetype=" text/plain; charset=utf8")
def make_response(self, gen, headers=None):
return Response(gen, mimetype=" text/plain; charset=utf8", headers=headers)

def _begin(self):
if show_hard_api_key_warning():
Expand Down
106 changes: 60 additions & 46 deletions src/server/endpoints/covidcast_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,51 +26,9 @@ def __str__(self):
return f"{self.source}:{self.signal}"


def fetch_data(
time_types: Optional[List[str]],
geo_types: Optional[List[str]],
signals: Optional[List[SourceSignal]],
):
# complain if the cache is more than 75 minutes old
max_age = 75 * 60

row = db.execute(
text(
"SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
)
).fetchone()

if not row or not row["epidata"]:
get_structured_logger('server_api').warning("no data in covidcast_meta cache")
return

age = row["age"]
if age > max_age and row["epidata"]:
get_structured_logger('server_api').warning("covidcast_meta cache is stale", cache_age=age)

epidata = loads(row["epidata"])

if not epidata:
return

def filter_row(row: Dict):
if time_types and row.get("time_type") not in time_types:
return False
if geo_types and row.get("geo_type") not in geo_types:
return False
if not signals:
return True
for signal in signals:
# match source and (signal or no signal or signal = *)
if row.get("data_source") == signal.source and (
signal.signal == "*" or signal.signal == row.get("signal")
):
return True
return False

for row in epidata:
if filter_row(row):
yield row
# empty generator that never yields
def _nonerator():
return (x for x in [])


@bp.route("/", methods=("GET", "POST"))
Expand All @@ -79,4 +37,60 @@ def handle():
signals = [SourceSignal(v) for v in (extract_strings("signals") or [])]
geo_types = extract_strings("geo_types")

return create_printer(request.values.get("format"))(filter_fields(fetch_data(time_types, geo_types, signals)))
printer = create_printer(request.values.get("format"))

metadata = db.execute(
text(
"SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
)
).fetchone()

if not metadata or "epidata" not in metadata:
# the db table `covidcast_meta_cache` has no rows
get_structured_logger('server_api').warning("no data in covidcast_meta cache")
return printer(_nonerator())

metadata_list = loads(metadata["epidata"])

if not metadata_list:
# the db table has a row, but there is no metadata about any signals in it
get_structured_logger('server_api').warning("empty entry in covidcast_meta cache")
return printer(_nonerator())

# the expected metadata regeneration interval in seconds, aka time between runs of
# src/acquisition/covidcast/covidcast_meta_cache_updater.py (currently 2h)
standard_age = 2 * 60 * 60
# a short period when a client can continue to use this metadata even if its slightly stale,
# which also gives some padding if the md generation is running slow,
# and which also acts as a minimum cacheable time (currently 10 mins)
age_margin = 10 * 60
# these should be updated if a stale cache will have undue impact on user activities, such as
# if we start updating the metadata table much more frequently and having up-to-the-minute
# metadata accuracy becomes important to users once more.
# TODO: get the above two values ^ from config vars?
age = metadata["age"]
reported_age = max(0, min(age, standard_age) - age_margin)

def cache_entry_gen():
for entry in metadata_list:
if time_types and entry.get("time_type") not in time_types:
continue
if geo_types and entry.get("geo_type") not in geo_types:
continue
if not signals:
yield entry
for signal in signals:
# match source and (signal or no signal or signal = *)
if entry.get("data_source") == signal.source and (
signal.signal == "*" or signal.signal == entry.get("signal")
):
yield entry

return printer(
filter_fields(cache_entry_gen()),
headers={
"Cache-Control": f"max-age={standard_age}, public",
"Age": f"{reported_age}",
# TODO?: "Expires": f"{}", # superseded by Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expires
}
)

0 comments on commit 59385b9

Please sign in to comment.