Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inspection Quota Check and Status Fix #2

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions gsc_bulk_indexer/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class BulkIndexer:
"""Submits the URLs of a website for indexing in Google Search Console"""

REQUEST_QUOTA = 200
INSPECTION_QUOTA = 2000

def __init__(
self,
Expand Down Expand Up @@ -108,7 +109,7 @@ def index(self) -> int:
def _request_indexing(self, urls: typing.List[str]):
for url in urls:
utils.logger.info(f"👩‍💻 Working on {url}")
current_state = self._cache.get(url) or {}
current_state = self._cache[url] or {}
notification_status = None
try:
# assuming that we will not hit this quota of 180 requests
Expand Down Expand Up @@ -158,7 +159,7 @@ def _check_indexing_status(self):
utils.logger.info("Checking indexing status...")
to_recheck: typing.List[str] = []
for url in self._urls:
current_state = self._cache.get(url) or {}
current_state = self._cache[url] or {}
if self._should_check_indexing_status(current_state):
to_recheck.append(url)
else:
Expand All @@ -172,13 +173,21 @@ def _check_indexing_status(self):
def _batched_check_indexing_status(
self, urls: typing.List[str], batch_size: int = 10
):
for url_batch in itertools.zip_longest(*[iter(urls)] * batch_size):
for idx, url_batch in enumerate(itertools.zip_longest(*[iter(urls)] * batch_size)):

if batch_size * (idx + 1) >= self.INSPECTION_QUOTA:
utils.logger.warning(
f"❌ Daily request quota of {self.INSPECTION_QUOTA} URLs is "
"exhausted! Try running this in a day again."
)
return

url_batch = list(filter(None, url_batch))
current_states = asyncio.run(
self._check_indexing_status_batch(url_batch)
)
for url, state in zip(url_batch, current_states):
current_state = self._cache.get(url) or {}
current_state = self._cache[url] or {}
current_state.update(state)
self._cache[url] = current_state
status = state.get("status")
Expand Down