diff --git a/src/server/_printer.py b/src/server/_printer.py index 6e32d7d43..336b650cc 100644 --- a/src/server/_printer.py +++ b/src/server/_printer.py @@ -46,13 +46,14 @@ def __init__(self): self.result: int = -1 self._max_results: int = MAX_COMPATIBILITY_RESULTS if is_compatibility_mode() else MAX_RESULTS - def make_response(self, gen): + def make_response(self, gen, headers=None): return Response( gen, mimetype="application/json", + headers=headers, ) - def __call__(self, generator: Iterable[Dict[str, Any]]) -> Response: + def __call__(self, generator: Iterable[Dict[str, Any]], headers=None) -> Response: def gen(): self.result = -2 # no result, default response began = False @@ -84,7 +85,7 @@ def gen(): if r is not None: yield r - return self.make_response(stream_with_context(gen())) + return self.make_response(stream_with_context(gen()), headers=headers) @property def remaining_rows(self) -> int: @@ -223,8 +224,11 @@ def __init__(self, filename: Optional[str] = "epidata"): super(CSVPrinter, self).__init__() self._filename = filename - def make_response(self, gen): - headers = {"Content-Disposition": f"attachment; filename={self._filename}.csv"} if self._filename else {} + def make_response(self, gen, headers=None): + if headers is None: + headers = {} + if self._filename: + headers["Content-Disposition"] = f"attachment; filename={self._filename}.csv" return Response(gen, mimetype="text/csv; charset=utf8", headers=headers) def _begin(self): @@ -296,8 +300,8 @@ class JSONLPrinter(APrinter): a printer class writing in JSONLines format """ - def make_response(self, gen): - return Response(gen, mimetype=" text/plain; charset=utf8") + def make_response(self, gen, headers=None): + return Response(gen, mimetype=" text/plain; charset=utf8", headers=headers) def _begin(self): if show_hard_api_key_warning(): diff --git a/src/server/endpoints/covidcast_meta.py b/src/server/endpoints/covidcast_meta.py index d10de18db..52d0a06eb 100644 --- a/src/server/endpoints/covidcast_meta.py +++ b/src/server/endpoints/covidcast_meta.py @@ -26,51 +26,9 @@ def __str__(self): return f"{self.source}:{self.signal}" -def fetch_data( - time_types: Optional[List[str]], - geo_types: Optional[List[str]], - signals: Optional[List[SourceSignal]], -): - # complain if the cache is more than 75 minutes old - max_age = 75 * 60 - - row = db.execute( - text( - "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1" - ) - ).fetchone() - - if not row or not row["epidata"]: - get_structured_logger('server_api').warning("no data in covidcast_meta cache") - return - - age = row["age"] - if age > max_age and row["epidata"]: - get_structured_logger('server_api').warning("covidcast_meta cache is stale", cache_age=age) - - epidata = loads(row["epidata"]) - - if not epidata: - return - - def filter_row(row: Dict): - if time_types and row.get("time_type") not in time_types: - return False - if geo_types and row.get("geo_type") not in geo_types: - return False - if not signals: - return True - for signal in signals: - # match source and (signal or no signal or signal = *) - if row.get("data_source") == signal.source and ( - signal.signal == "*" or signal.signal == row.get("signal") - ): - return True - return False - - for row in epidata: - if filter_row(row): - yield row +# empty generator that never yields +def _nonerator(): + return (x for x in []) @bp.route("/", methods=("GET", "POST")) @@ -79,4 +37,60 @@ def handle(): signals = [SourceSignal(v) for v in (extract_strings("signals") or [])] geo_types = extract_strings("geo_types") - return create_printer(request.values.get("format"))(filter_fields(fetch_data(time_types, geo_types, signals))) + printer = create_printer(request.values.get("format")) + + metadata = db.execute( + text( + "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1" + ) + ).fetchone() + + if not metadata or "epidata" not in metadata: + # the db table `covidcast_meta_cache` has no rows + get_structured_logger('server_api').warning("no data in covidcast_meta cache") + return printer(_nonerator()) + + metadata_list = loads(metadata["epidata"]) + + if not metadata_list: + # the db table has a row, but there is no metadata about any signals in it + get_structured_logger('server_api').warning("empty entry in covidcast_meta cache") + return printer(_nonerator()) + + # the expected metadata regeneration interval in seconds, aka time between runs of + # src/acquisition/covidcast/covidcast_meta_cache_updater.py (currently 2h) + standard_age = 2 * 60 * 60 + # a short period when a client can continue to use this metadata even if its slightly stale, + # which also gives some padding if the md generation is running slow, + # and which also acts as a minimum cacheable time (currently 10 mins) + age_margin = 10 * 60 + # these should be updated if a stale cache will have undue impact on user activities, such as + # if we start updating the metadata table much more frequently and having up-to-the-minute + # metadata accuracy becomes important to users once more. + # TODO: get the above two values ^ from config vars? + age = metadata["age"] + reported_age = max(0, min(age, standard_age) - age_margin) + + def cache_entry_gen(): + for entry in metadata_list: + if time_types and entry.get("time_type") not in time_types: + continue + if geo_types and entry.get("geo_type") not in geo_types: + continue + if not signals: + yield entry + for signal in signals: + # match source and (signal or no signal or signal = *) + if entry.get("data_source") == signal.source and ( + signal.signal == "*" or signal.signal == entry.get("signal") + ): + yield entry + + return printer( + filter_fields(cache_entry_gen()), + headers={ + "Cache-Control": f"max-age={standard_age}, public", + "Age": f"{reported_age}", + # TODO?: "Expires": f"{}", # superseded by Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expires + } + )