From 2760d05da1c3dbbe3a439d1951d6fd891e24fe80 Mon Sep 17 00:00:00 2001 From: Jose Javier Merchante Date: Tue, 13 Feb 2024 13:24:13 +0100 Subject: [PATCH] Retry Kitsune when rate limited Kitsune now includes the `--sleep-for-rate` option to manage `429 Too Many Requests` errors. You can configure retries and sleep duration using the `--max-retries` and `--sleep-time` options respectively. Signed-off-by: Jose Javier Merchante --- perceval/backends/mozilla/kitsune.py | 75 +++++++-- .../retry-kitsune-when-rate-limited.yml | 10 ++ tests/test_kitsune.py | 147 +++++++++++------- 3 files changed, 160 insertions(+), 72 deletions(-) create mode 100644 releases/unreleased/retry-kitsune-when-rate-limited.yml diff --git a/perceval/backends/mozilla/kitsune.py b/perceval/backends/mozilla/kitsune.py index 10804b3..faa02be 100644 --- a/perceval/backends/mozilla/kitsune.py +++ b/perceval/backends/mozilla/kitsune.py @@ -22,6 +22,7 @@ import json import logging +import time import requests @@ -32,8 +33,7 @@ BackendCommand, BackendCommandArgumentParser) from ...client import HttpClient -from ...errors import ParseError - +from ...errors import ParseError, RateLimitError, HttpClientError logger = logging.getLogger(__name__) @@ -42,6 +42,9 @@ CATEGORY_QUESTION = "question" +DEFAULT_SLEEP_TIME = 180 +MAX_RETRIES = 5 + class Kitsune(Backend): """Kitsune backend for Perceval. @@ -58,17 +61,21 @@ class Kitsune(Backend): :param archive: archive to store/retrieve items :param ssl_verify: enable/disable SSL verification """ - version = '0.8.0' + version = '0.9.0' CATEGORIES = [CATEGORY_QUESTION] - def __init__(self, url=None, tag=None, archive=None, ssl_verify=True): + def __init__(self, url=None, tag=None, archive=None, ssl_verify=True, sleep_for_rate=False, + sleep_time=DEFAULT_SLEEP_TIME, max_retries=MAX_RETRIES): if not url: url = KITSUNE_URL origin = url super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify) self.url = url + self.sleep_for_rate = sleep_for_rate + self.sleep_time = sleep_time + self.max_retries = max_retries self.client = None @@ -180,9 +187,9 @@ def metadata(self, item, filter_classified=False): def has_archiving(cls): """Returns whether it supports archiving items on the fetch process. - :returns: this backend supports items archive + :returns: this backend does not support items archive """ - return True + return False @classmethod def has_resuming(cls): @@ -221,10 +228,11 @@ def metadata_category(item): """ return CATEGORY_QUESTION - def _init_client(self, from_archive=False): + def _init_client(self): """Init client""" - return KitsuneClient(self.url, self.archive, from_archive, self.ssl_verify) + return KitsuneClient(self.url, self.ssl_verify, self.sleep_for_rate, + self.sleep_time, self.max_retries) class KitsuneClient(HttpClient): @@ -234,17 +242,23 @@ class KitsuneClient(HttpClient): a Kitsune site. :param url: URL of Kitsune (sample https://support.mozilla.org) - :param archive: an archive to store/read fetched data - :param from_archive: it tells whether to write/read the archive :param ssl_verify: enable/disable SSL verification + :param sleep_for_rate: sleep until rate limit is reset + :param sleep_time: seconds to sleep for rate limit + :param max_retries: number of max retries for RateLimit :raises HTTPError: when an error occurs doing the request """ FIRST_PAGE = 1 # Initial page in Kitsune ITEMS_PER_PAGE = 20 # Items per page in Kitsune API - def __init__(self, url, archive=None, from_archive=False, ssl_verify=True): - super().__init__(urijoin(url, '/api/2/'), archive=archive, from_archive=from_archive, ssl_verify=ssl_verify) + def __init__(self, url, ssl_verify=True, sleep_for_rate=False, + sleep_time=DEFAULT_SLEEP_TIME, max_retries=MAX_RETRIES): + super().__init__(urijoin(url, '/api/2/'), ssl_verify=ssl_verify) + + self.sleep_for_rate = sleep_for_rate + self.sleep_time = sleep_time + self.max_retries = max_retries def get_questions(self, offset=None): """Retrieve questions from older to newer updated starting offset""" @@ -292,15 +306,36 @@ def get_question_answers(self, question_id): break page += 1 + def sleep_for_rate_limit(self): + """The fetching process sleeps until the rate limit is restored or + raises a RateLimitError exception if sleep_for_rate flag is disabled. + """ + cause = "Rate limit exhausted." + if self.sleep_for_rate: + logger.info(f"{cause} Waiting {self.sleep_time} secs for rate limit reset.") + time.sleep(self.sleep_time) + else: + raise RateLimitError(cause=cause, seconds_to_reset=self.sleep_time) + def fetch(self, url, params): """Return the textual content associated to the Response object""" logger.debug("Kitsune client calls API: %s params: %s", url, str(params)) - response = super().fetch(url, payload=params) + retries = self.max_retries + while retries >= 0: + try: + response = super().fetch(url, payload=params) + return response.text + except requests.exceptions.HTTPError as ex: + if ex.response.status_code == 429 and retries > 0: + retries -= 1 + self.sleep_for_rate_limit() + else: + raise ex - return response.text + raise HttpClientError(cause="Max retries exceeded") class KitsuneCommand(BackendCommand): @@ -314,7 +349,6 @@ def setup_cmd_parser(cls): parser = BackendCommandArgumentParser(cls.BACKEND, offset=True, - archive=True, ssl_verify=True) # Required arguments @@ -322,4 +356,15 @@ def setup_cmd_parser(cls): default="https://support.mozilla.org", help="Kitsune URL (default: https://support.mozilla.org)") + # Kitsune options + group = parser.parser.add_argument_group('Kitsune arguments') + group.add_argument('--sleep-for-rate', dest='sleep_for_rate', + action='store_true', + help="sleep for getting more rate") + group.add_argument('--max-retries', dest='max_retries', + default=MAX_RETRIES, type=int, + help="number of API call retries") + group.add_argument('--sleep-time', dest='sleep_time', + default=DEFAULT_SLEEP_TIME, type=int, + help="sleeping time between API call retries") return parser diff --git a/releases/unreleased/retry-kitsune-when-rate-limited.yml b/releases/unreleased/retry-kitsune-when-rate-limited.yml new file mode 100644 index 0000000..2ef0fa7 --- /dev/null +++ b/releases/unreleased/retry-kitsune-when-rate-limited.yml @@ -0,0 +1,10 @@ +--- +title: Retry Kitsune when rate limited +category: added +author: Jose Javier Merchante +issue: null +notes: > + Kitsune now includes the `--sleep-for-rate` option to manage + `429 Too Many Requests` errors. You can configure retries + and sleep duration using the `--max-retries` and `--sleep-time` + options respectively. diff --git a/tests/test_kitsune.py b/tests/test_kitsune.py index e859241..5a4cc46 100644 --- a/tests/test_kitsune.py +++ b/tests/test_kitsune.py @@ -23,17 +23,18 @@ # import json +import time import unittest import httpretty import requests from perceval.backend import BackendCommandArgumentParser +from perceval.errors import RateLimitError from perceval.backends.mozilla.kitsune import (Kitsune, KitsuneCommand, KitsuneClient) -from base import TestCaseBackendArchive KITSUNE_SERVER_URL = 'http://example.com' @@ -42,6 +43,7 @@ KITSUNE_API_ANSWER = KITSUNE_SERVER_URL + '/api/2/answer/' KITSUNE_SERVER_FAIL_PAGE = 69 +KITSUNE_SERVER_RATELIMIT_PAGE = 200 KITSUNE_ITEMS_PER_PAGE = 20 @@ -90,6 +92,14 @@ def request_callback(method, uri, headers): elif page == str(KITSUNE_SERVER_FAIL_PAGE + 1): # Next page to the server fail returns questions return (200, headers, mozilla_questions_2) + elif page == str(KITSUNE_SERVER_RATELIMIT_PAGE): + # To test RateLimitError + last_request = HTTPServer.requests_http[-1] if HTTPServer.requests_http else None + if last_request and last_request.querystring['page'][0] == str(KITSUNE_SERVER_RATELIMIT_PAGE): + body = mozilla_questions_2 + else: + HTTPServer.requests_http.append(httpretty.last_request()) + return (429, headers, '') else: return (404, headers, '') @@ -115,11 +125,13 @@ class TestKitsuneBackend(unittest.TestCase): def test_initialization(self): """Test whether attributes are initializated""" - kitsune = Kitsune(KITSUNE_SERVER_URL, tag='test') + kitsune = Kitsune(KITSUNE_SERVER_URL, tag='test', sleep_for_rate=True, max_retries=10) self.assertEqual(kitsune.url, KITSUNE_SERVER_URL) self.assertEqual(kitsune.origin, KITSUNE_SERVER_URL) self.assertEqual(kitsune.tag, 'test') + self.assertEqual(kitsune.sleep_for_rate, True) + self.assertEqual(kitsune.max_retries, 10) self.assertIsNone(kitsune.client) self.assertTrue(kitsune.ssl_verify) @@ -140,7 +152,7 @@ def test_initialization(self): def test_has_archiving(self): """Test if it returns True when has_archiving is called""" - self.assertEqual(Kitsune.has_archiving(), True) + self.assertEqual(Kitsune.has_archiving(), False) def test_has_resuming(self): """Test if it returns True when has_resuming is called""" @@ -254,57 +266,6 @@ def test_search_fields(self): self.assertEqual(kitsune.metadata_id(question['data']), question['search_fields']['item_id']) -class TestKitsuneBackendArchive(TestCaseBackendArchive): - """Kitsune backend tests using an archive""" - - def setUp(self): - super().setUp() - self.backend_write_archive = Kitsune(KITSUNE_SERVER_URL, archive=self.archive) - self.backend_read_archive = Kitsune(KITSUNE_SERVER_URL, archive=self.archive) - - @httpretty.activate - def test_fetch_from_archive(self): - """Test whether the questions are returned from archive""" - - HTTPServer.routes() - - self._test_fetch_from_archive() - - @httpretty.activate - def test_fetch_offset_from_archive_offset_0(self): - """Test whether the questions are returned offset from archive""" - - HTTPServer.routes() - - offset = 0 - self._test_fetch_from_archive(offset=offset) - - @httpretty.activate - def test_fetch_offset_from_archive_offset_2(self): - """Test whether the questions are returned offset from archive""" - - HTTPServer.routes() - - offset = 2 - self._test_fetch_from_archive(offset=offset) - - @httpretty.activate - def test_fetch_offset_from_archive_offset_4(self): - """Test whether the questions are returned offset from archive""" - - HTTPServer.routes() - - offset = 4 - self._test_fetch_from_archive(offset=offset) - - @httpretty.activate - def test_fetch_empty_from_archive(self): - """Test whether it works when no jobs are fetched from archive""" - - HTTPServer.routes(empty=True) - self._test_fetch_from_archive() - - class TestKitsuneCommand(unittest.TestCase): """Tests for KitsuneCommand class""" @@ -322,15 +283,17 @@ def test_setup_cmd_parser(self): args = [KITSUNE_SERVER_URL, '--tag', 'test', - '--no-archive', - '--offset', '88'] + '--offset', '88', + '--sleep-for-rate', + '--max-retries', '10'] parsed_args = parser.parse(*args) self.assertEqual(parsed_args.url, KITSUNE_SERVER_URL) self.assertEqual(parsed_args.tag, 'test') - self.assertEqual(parsed_args.no_archive, True) self.assertEqual(parsed_args.offset, 88) self.assertTrue(parsed_args.ssl_verify) + self.assertTrue(parsed_args.sleep_for_rate) + self.assertEqual(parsed_args.max_retries, 10) args = [KITSUNE_SERVER_URL, '--no-ssl-verify'] @@ -360,6 +323,9 @@ def test_init(self): self.assertIsNone(kitsune.archive) self.assertFalse(kitsune.from_archive) self.assertTrue(kitsune.ssl_verify) + self.assertEqual(kitsune.sleep_for_rate, False) + self.assertEqual(kitsune.sleep_time, 180) + self.assertEqual(kitsune.max_retries, 5) kitsune = KitsuneClient(KITSUNE_SERVER_URL, ssl_verify=False) @@ -368,6 +334,16 @@ def test_init(self): self.assertFalse(kitsune.from_archive) self.assertFalse(kitsune.ssl_verify) + kitsune = KitsuneClient(KITSUNE_SERVER_URL, sleep_for_rate=True, sleep_time=100, + max_retries=10) + + self.assertEqual(kitsune.base_url, base_url) + self.assertIsNone(kitsune.archive) + self.assertFalse(kitsune.from_archive) + self.assertEqual(kitsune.sleep_for_rate, True) + self.assertEqual(kitsune.sleep_time, 100) + self.assertEqual(kitsune.max_retries, 10) + @httpretty.activate def test_get_questions(self): """Test get_questions API call""" @@ -412,6 +388,63 @@ def test_get_question_answers(self): } self.assertDictEqual(req.querystring, expected) + @httpretty.activate + def test_sleep_for_rate(self): + """ Test if the clients sleeps when the rate limit is reached""" + + body = read_file('data/kitsune/kitsune_questions_2_2.json') + HTTPServer.routes() + # Clear previous requests + HTTPServer.requests_http = [] + + wait_to_reset = 2 + client = KitsuneClient(KITSUNE_SERVER_URL, + sleep_for_rate=True, + sleep_time=wait_to_reset, + max_retries=2) + + # Call API + before = float(time.time()) + offset = (KITSUNE_SERVER_RATELIMIT_PAGE - 1) * KITSUNE_ITEMS_PER_PAGE + response = next(client.get_questions(offset=offset)) + + after = float(time.time()) + diff = after - before + self.assertGreaterEqual(diff, wait_to_reset) + + self.assertEqual(response, body) + req = HTTPServer.requests_http[-1] + self.assertEqual(req.method, 'GET') + self.assertRegex(req.path, '/api/2/question/') + # Check request params + expected = { + 'page': [str(KITSUNE_SERVER_RATELIMIT_PAGE)], + 'ordering': ['updated'], + } + self.assertDictEqual(req.querystring, expected) + + @httpretty.activate + def test_rate_limit_error(self): + """Test if a rate limit error is raised when rate is exhausted""" + + HTTPServer.routes() + + client = KitsuneClient(KITSUNE_SERVER_URL) + + offset = (KITSUNE_SERVER_RATELIMIT_PAGE - 1) * KITSUNE_ITEMS_PER_PAGE + with self.assertRaises(RateLimitError): + next(client.get_questions(offset=offset)) + + req = HTTPServer.requests_http[-1] + self.assertEqual(req.method, 'GET') + self.assertRegex(req.path, '/api/2/question/') + # Check request params + expected = { + 'page': [str(KITSUNE_SERVER_RATELIMIT_PAGE)], + 'ordering': ['updated'], + } + self.assertDictEqual(req.querystring, expected) + if __name__ == "__main__": unittest.main(warnings='ignore')