From 1e843ed15b2947d5cdd8a78ff5d94aa820ac56f4 Mon Sep 17 00:00:00 2001 From: Timothy Ellersiek Date: Wed, 13 Mar 2024 22:11:19 +0100 Subject: [PATCH 1/4] add inspection quota --- gsc_bulk_indexer/indexer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gsc_bulk_indexer/indexer.py b/gsc_bulk_indexer/indexer.py index 815e7c4..d843f3c 100644 --- a/gsc_bulk_indexer/indexer.py +++ b/gsc_bulk_indexer/indexer.py @@ -11,6 +11,7 @@ class BulkIndexer: """Submits the URLs of a website for indexing in Google Search Console""" REQUEST_QUOTA = 200 + INSPECTION_QUOTA = 2000 def __init__( self, @@ -172,7 +173,11 @@ def _check_indexing_status(self): def _batched_check_indexing_status( self, urls: typing.List[str], batch_size: int = 10 ): - for url_batch in itertools.zip_longest(*[iter(urls)] * batch_size): + for idx, url_batch in enumerate(itertools.zip_longest(*[iter(urls)] * batch_size)): + + if batch_size * (idx + 1) >= self.INSPECTION_QUOTA: + return + url_batch = list(filter(None, url_batch)) current_states = asyncio.run( self._check_indexing_status_batch(url_batch) From 4796c3d815082b1d20c1ce4bdf1bb50ba169b0db Mon Sep 17 00:00:00 2001 From: Timothy Ellersiek Date: Wed, 13 Mar 2024 22:19:42 +0100 Subject: [PATCH 2/4] add warning message --- gsc_bulk_indexer/indexer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gsc_bulk_indexer/indexer.py b/gsc_bulk_indexer/indexer.py index d843f3c..fe04070 100644 --- a/gsc_bulk_indexer/indexer.py +++ b/gsc_bulk_indexer/indexer.py @@ -176,6 +176,10 @@ def _batched_check_indexing_status( for idx, url_batch in enumerate(itertools.zip_longest(*[iter(urls)] * batch_size)): if batch_size * (idx + 1) >= self.INSPECTION_QUOTA: + utils.logger.warning( + f"❌ Daily request quota of {self.REQUEST_QUOTA} URLs is " + "exhausted! Try running this in a day again." + ) return url_batch = list(filter(None, url_batch)) From 648d690afef612e66869719dd81bd3ca47fe36f5 Mon Sep 17 00:00:00 2001 From: Timothy Ellersiek Date: Mon, 18 Mar 2024 21:38:39 +0100 Subject: [PATCH 3/4] fix: status lookup --- gsc_bulk_indexer/indexer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gsc_bulk_indexer/indexer.py b/gsc_bulk_indexer/indexer.py index fe04070..07ec86f 100644 --- a/gsc_bulk_indexer/indexer.py +++ b/gsc_bulk_indexer/indexer.py @@ -159,7 +159,7 @@ def _check_indexing_status(self): utils.logger.info("Checking indexing status...") to_recheck: typing.List[str] = [] for url in self._urls: - current_state = self._cache.get(url) or {} + current_state = self._cache._status.get(url) or {} if self._should_check_indexing_status(current_state): to_recheck.append(url) else: @@ -177,7 +177,7 @@ def _batched_check_indexing_status( if batch_size * (idx + 1) >= self.INSPECTION_QUOTA: utils.logger.warning( - f"❌ Daily request quota of {self.REQUEST_QUOTA} URLs is " + f"❌ Daily request quota of {self.INSPECTION_QUOTA} URLs is " "exhausted! Try running this in a day again." ) return From 5dec522f3d9abf3a1b47b829081c8c4f1625bdaa Mon Sep 17 00:00:00 2001 From: Timothy Ellersiek Date: Tue, 19 Mar 2024 21:27:41 +0100 Subject: [PATCH 4/4] fix: lookup dict fix --- gsc_bulk_indexer/indexer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gsc_bulk_indexer/indexer.py b/gsc_bulk_indexer/indexer.py index 07ec86f..feb8bdf 100644 --- a/gsc_bulk_indexer/indexer.py +++ b/gsc_bulk_indexer/indexer.py @@ -109,7 +109,7 @@ def index(self) -> int: def _request_indexing(self, urls: typing.List[str]): for url in urls: utils.logger.info(f"👩‍💻 Working on {url}") - current_state = self._cache.get(url) or {} + current_state = self._cache[url] or {} notification_status = None try: # assuming that we will not hit this quota of 180 requests @@ -159,7 +159,7 @@ def _check_indexing_status(self): utils.logger.info("Checking indexing status...") to_recheck: typing.List[str] = [] for url in self._urls: - current_state = self._cache._status.get(url) or {} + current_state = self._cache[url] or {} if self._should_check_indexing_status(current_state): to_recheck.append(url) else: @@ -187,7 +187,7 @@ def _batched_check_indexing_status( self._check_indexing_status_batch(url_batch) ) for url, state in zip(url_batch, current_states): - current_state = self._cache.get(url) or {} + current_state = self._cache[url] or {} current_state.update(state) self._cache[url] = current_state status = state.get("status")