From c3ed86f85f4d4c3f76e87e51c301eb7302982ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 17 Jun 2024 23:49:25 +0200 Subject: [PATCH] Improve handling of user-defined session config code --- docs/usage/session.rst | 74 +++++++++-- scrapy_zyte_api/_session.py | 209 ++++++++++++++++++------------ tests/test_sessions.py | 244 ++++++++++++++++++++++++++++++++++-- 3 files changed, 427 insertions(+), 100 deletions(-) diff --git a/docs/usage/session.rst b/docs/usage/session.rst index c0096507..b5857d42 100644 --- a/docs/usage/session.rst +++ b/docs/usage/session.rst @@ -215,6 +215,43 @@ init requests will still ignore the cookiejar. To include cookies in session init requests, use :ref:`init params `. +Session retry policies +====================== + +The following retry policies are designed to work well with session management +(see :ref:`enable-sessions`): + +.. autodata:: scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY + :annotation: + +.. autodata:: scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY + :annotation: + + +Spider closers +============== + +Session management can close your spider early in the following scenarios: + +- ``bad_session_inits``: Too many session initializations failed in a row for + a given session pool. + + You can use the :setting:`ZYTE_API_SESSION_MAX_BAD_INITS` and + :setting:`ZYTE_API_SESSION_MAX_BAD_INITS_PER_POOL` settings to adjust that + maximum. + +- ``pool_error``: There was an error determining the session pool ID for some + request. + + It is most likely the result of a bad implementation of + :meth:`SessionConfig.pool `; the + logs should contain an error message with a traceback for such errors. + +A custom :meth:`SessionConfig.check ` +implementation may also close your spider with a custom reason by raising a +:exc:`~scrapy.exceptions.CloseSpider` exception. + + .. _session-stats: Session stats @@ -222,6 +259,14 @@ Session stats The following stats exist for scrapy-zyte-api session management: +``scrapy-zyte-api/sessions/pools/{pool}/init/check-error`` + Number of times that a session for pool ``{pool}`` triggered an unexpected + exception during its session validation check right after initialization. + + It is most likely the result of a bad implementation of + :meth:`SessionConfig.check `; the + logs should contain an error message with a traceback for such errors. + ``scrapy-zyte-api/sessions/pools/{pool}/init/check-failed`` Number of times that a session from pool ``{pool}`` failed its session validation check right after initialization. @@ -234,6 +279,23 @@ The following stats exist for scrapy-zyte-api session management: Number of times that initializing a session for pool ``{pool}`` resulted in an :ref:`unsuccessful response `. +``scrapy-zyte-api/sessions/pools/{pool}/init/param-error`` + Number of times that initializing a session for pool ``{pool}`` triggered + an unexpected exception when obtaining the Zyte API parameters for session + initialization. + + It is most likely the result of a bad implementation of + :meth:`SessionConfig.params `; the + logs should contain an error message with a traceback for such errors. + +``scrapy-zyte-api/sessions/pools/{pool}/use/check-error`` + Number of times that a response that used a session from pool ``{pool}`` + triggered an unexpected exception during its session validation check. + + It is most likely the result of a bad implementation of + :meth:`SessionConfig.check `; the + logs should contain an error message with a traceback for such errors. + ``scrapy-zyte-api/sessions/pools/{pool}/use/check-failed`` Number of times that a response that used a session from pool ``{pool}`` failed its session validation check. @@ -248,15 +310,3 @@ The following stats exist for scrapy-zyte-api session management: ``scrapy-zyte-api/sessions/pools/{pool}/use/failed`` Number of times that a request that used a session from pool ``{pool}`` got an :ref:`unsuccessful response `. - -Session retry policies -====================== - -The following retry policies are designed to work well with session management -(see :ref:`enable-sessions`): - -.. autodata:: scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY - :annotation: - -.. autodata:: scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY - :annotation: diff --git a/scrapy_zyte_api/_session.py b/scrapy_zyte_api/_session.py index 3f1edd04..fec3e5b0 100644 --- a/scrapy_zyte_api/_session.py +++ b/scrapy_zyte_api/_session.py @@ -1,9 +1,11 @@ from asyncio import Task, create_task, sleep from collections import defaultdict, deque from copy import deepcopy +from functools import partial from logging import getLogger from typing import Any, Deque, Dict, Optional, Set, Type, TypeVar, Union, cast from uuid import uuid4 +from weakref import WeakKeyDictionary from scrapy import Request, Spider from scrapy.crawler import Crawler @@ -159,6 +161,10 @@ def build_from_crawler( return create_instance(objcls, settings=None, crawler=crawler, *args, **kwargs) +class PoolError(ValueError): + pass + + class TooManyBadSessionInits(RuntimeError): pass @@ -233,7 +239,8 @@ def params(self, request: Request) -> Dict[str, Any]: def check(self, response: Response, request: Request) -> bool: """Return ``True`` if the session used to fetch *response* should be - kept or ``False`` if it should be discarded. + kept, return ``False`` if it should be discarded, or raise + :exc:`~scrapy.exceptions.CloseSpider` if the spider should be closed. The default implementation checks the outcome of the ``setLocation`` action if session initialization was location-based, as described in @@ -417,6 +424,7 @@ def __init__(self, crawler: Crawler): # As soon as a session expires, it is removed from its pool, and a task # to initialize that new session is started. self._pools: Dict[str, Set[str]] = defaultdict(set) + self._pool_cache: WeakKeyDictionary[Request, str] = WeakKeyDictionary() # The queue is a rotating list of session IDs to use. # @@ -448,19 +456,47 @@ def __init__(self, crawler: Crawler): # to prevent garbage collection to remove the tasks. self._init_tasks: Set[Task] = set() + self._session_config_cache: WeakKeyDictionary[Request, SessionConfig] = ( + WeakKeyDictionary() + ) self._session_config_map: Dict[Type[SessionConfig], SessionConfig] = {} def _get_session_config(self, request: Request) -> SessionConfig: - cls = session_config_registry.session_config_cls(request) - if cls not in self._session_config_map: - self._session_config_map[cls] = build_from_crawler(cls, self._crawler) - return self._session_config_map[cls] + try: + return self._session_config_cache[request] + except KeyError: + cls = session_config_registry.session_config_cls(request) + if cls not in self._session_config_map: + self._session_config_map[cls] = build_from_crawler(cls, self._crawler) + self._session_config_cache[request] = self._session_config_map[cls] + return self._session_config_map[cls] - async def _init_session(self, session_id: str, request: Request) -> bool: - session_config = self._get_session_config(request) - pool = session_config.pool(request) + def _get_pool(self, request): + try: + return self._pool_cache[request] + except KeyError: + session_config = self._get_session_config(request) + try: + pool = session_config.pool(request) + except Exception: + raise PoolError + self._pool_cache[request] = pool + return pool - session_params = deepcopy(session_config.params(request)) + async def _init_session(self, session_id: str, request: Request, pool: str) -> bool: + session_config = self._get_session_config(request) + try: + session_params = session_config.params(request) + except Exception: + self._crawler.stats.inc_value( + f"scrapy-zyte-api/sessions/pools/{pool}/init/param-error" + ) + logger.exception( + f"Unexpected exception raised while obtaining session " + f"initialization parameters for request {request}." + ) + return False + session_params = deepcopy(session_params) session_init_url = session_params.pop("url", request.url) spider = self._crawler.spider session_init_request = Request( @@ -484,21 +520,31 @@ async def _init_session(self, session_id: str, request: Request) -> bool: self._crawler.stats.inc_value( f"scrapy-zyte-api/sessions/pools/{pool}/init/failed" ) - result = False + return False else: - result = session_config.check(response, session_init_request) + try: + result = session_config.check(response, session_init_request) + except CloseSpider: + raise + except Exception: + self._crawler.stats.inc_value( + f"scrapy-zyte-api/sessions/pools/{pool}/init/check-error" + ) + logger.exception( + f"Unexpected exception raised while checking session " + f"validity on response {response}." + ) + return False outcome = "passed" if result else "failed" self._crawler.stats.inc_value( f"scrapy-zyte-api/sessions/pools/{pool}/init/check-{outcome}" ) return result - async def _create_session(self, request: Request) -> str: - session_config = self._get_session_config(request) - pool = session_config.pool(request) + async def _create_session(self, request: Request, pool: str) -> str: while True: session_id = str(uuid4()) - session_init_succeeded = await self._init_session(session_id, request) + session_init_succeeded = await self._init_session(session_id, request, pool) if session_init_succeeded: self._pools[pool].add(session_id) self._bad_inits[pool] = 0 @@ -509,10 +555,8 @@ async def _create_session(self, request: Request) -> str: self._queues[pool].append(session_id) return session_id - async def _next_from_queue(self, request: Request) -> str: + async def _next_from_queue(self, request: Request, pool: str) -> str: session_id = None - session_config = self._get_session_config(request) - pool = session_config.pool(request) attempts = 0 while session_id not in self._pools[pool]: # After 1st loop: invalid session. try: @@ -547,13 +591,12 @@ async def _next(self, request) -> str: *request* is needed to determine the URL to use for request initialization. """ - session_config = self._get_session_config(request) - pool = session_config.pool(request) + pool = self._get_pool(request) if self._pending_initial_sessions[pool] >= 1: self._pending_initial_sessions[pool] -= 1 - session_id = await self._create_session(request) + session_id = await self._create_session(request, pool) else: - session_id = await self._next_from_queue(request) + session_id = await self._next_from_queue(request, pool) return session_id def is_init_request(self, request: Request) -> bool: @@ -581,9 +624,7 @@ def _get_request_session_id(self, request: Request) -> Optional[str]: ) return None - def _start_session_refresh(self, session_id: str, request: Request): - session_config = self._get_session_config(request) - pool = session_config.pool(request) + def _start_session_refresh(self, session_id: str, request: Request, pool: str): try: self._pools[pool].remove(session_id) except KeyError: @@ -591,7 +632,7 @@ def _start_session_refresh(self, session_id: str, request: Request): # not refresh the session again. pass else: - task = create_task(self._create_session(request)) + task = create_task(self._create_session(request, pool)) self._init_tasks.add(task) task.add_done_callback(self._init_tasks.discard) try: @@ -599,26 +640,38 @@ def _start_session_refresh(self, session_id: str, request: Request): except KeyError: pass - def _start_request_session_refresh(self, request: Request): + def _start_request_session_refresh(self, request: Request, pool: str): session_id = self._get_request_session_id(request) if session_id is None: return - self._start_session_refresh(session_id, request) + self._start_session_refresh(session_id, request, pool) async def check(self, response: Response, request: Request) -> bool: """Check the response for signs of session expiration, update the internal session pool accordingly, and return ``False`` if the session has expired or ``True`` if the session passed validation.""" session_config = self._get_session_config(request) - passed = session_config.check(response, request) - pool = session_config.pool(request) - outcome = "passed" if passed else "failed" - self._crawler.stats.inc_value( - f"scrapy-zyte-api/sessions/pools/{pool}/use/check-{outcome}" - ) - if passed: - return True - self._start_request_session_refresh(request) + pool = self._get_pool(request) + try: + passed = session_config.check(response, request) + except CloseSpider: + raise + except Exception: + self._crawler.stats.inc_value( + f"scrapy-zyte-api/sessions/pools/{pool}/use/check-error" + ) + logger.exception( + f"Unexpected exception raised while checking session " + f"validity on response {response}." + ) + else: + outcome = "passed" if passed else "failed" + self._crawler.stats.inc_value( + f"scrapy-zyte-api/sessions/pools/{pool}/use/check-{outcome}" + ) + if passed: + return True + self._start_request_session_refresh(request, pool) return False async def assign(self, request: Request): @@ -645,8 +698,7 @@ async def assign(self, request: Request): request.meta.setdefault("dont_merge_cookies", True) def handle_error(self, request: Request): - session_config = self._get_session_config(request) - pool = session_config.pool(request) + pool = self._get_pool(request) self._crawler.stats.inc_value( f"scrapy-zyte-api/sessions/pools/{pool}/use/failed" ) @@ -655,15 +707,40 @@ def handle_error(self, request: Request): self._errors[session_id] += 1 if self._errors[session_id] < self._max_errors: return - self._start_request_session_refresh(request) + self._start_request_session_refresh(request, pool) def handle_expiration(self, request: Request): - session_config = self._get_session_config(request) - pool = session_config.pool(request) + pool = self._get_pool(request) self._crawler.stats.inc_value( f"scrapy-zyte-api/sessions/pools/{pool}/use/expired" ) - self._start_request_session_refresh(request) + self._start_request_session_refresh(request, pool) + + +class FatalErrorHandler: + + def __init__(self, crawler): + self.crawler = crawler + + async def __aenter__(self): + return None + + async def __aexit__(self, exc_type, exc, tb): + if exc_type is None: + return + from twisted.internet import reactor + from twisted.internet.interfaces import IReactorCore + + reactor = cast(IReactorCore, reactor) + close = partial( + reactor.callLater, 0, self.crawler.engine.close_spider, self.crawler.spider + ) + if issubclass(exc_type, TooManyBadSessionInits): + close("bad_session_inits") + elif issubclass(exc_type, PoolError): + close("pool_error") + elif issubclass(exc_type, CloseSpider): + close(exc.reason) class ScrapyZyteAPISessionDownloaderMiddleware: @@ -676,35 +753,15 @@ def __init__(self, crawler: Crawler): self._enabled = crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False) self._crawler = crawler self._sessions = _SessionManager(crawler) + self._fatal_error_handler = FatalErrorHandler(crawler) async def process_request(self, request: Request, spider: Spider) -> None: if not request.meta.get( "zyte_api_session_enabled", self._enabled ) or self._sessions.is_init_request(request): return - try: + async with self._fatal_error_handler: await self._sessions.assign(request) - except TooManyBadSessionInits: - from twisted.internet import reactor - from twisted.internet.interfaces import IReactorCore - - reactor = cast(IReactorCore, reactor) - reactor.callLater( - 0, self._crawler.engine.close_spider, spider, "bad_session_inits" - ) - raise - except CloseSpider as close_spider_exception: - from twisted.internet import reactor - from twisted.internet.interfaces import IReactorCore - - reactor = cast(IReactorCore, reactor) - reactor.callLater( - 0, - self._crawler.engine.close_spider, - spider, - close_spider_exception.reason, - ) - raise async def process_response( self, request: Request, response: Response, spider: Spider @@ -715,20 +772,8 @@ async def process_response( or self._sessions.is_init_request(request) ): return response - try: + async with self._fatal_error_handler: passed = await self._sessions.check(response, request) - except CloseSpider as close_spider_exception: - from twisted.internet import reactor - from twisted.internet.interfaces import IReactorCore - - reactor = cast(IReactorCore, reactor) - reactor.callLater( - 0, - self._crawler.engine.close_spider, - spider, - close_spider_exception.reason, - ) - raise if not passed: new_request_or_none = get_retry_request( request, @@ -740,7 +785,7 @@ async def process_response( return new_request_or_none return response - def process_exception( + async def process_exception( self, request: Request, exception: Exception, spider: Spider ) -> Union[Request, None]: if ( @@ -751,10 +796,12 @@ def process_exception( return None if exception.parsed.type == "/problem/session-expired": - self._sessions.handle_expiration(request) + async with self._fatal_error_handler: + self._sessions.handle_expiration(request) reason = "session_expired" elif exception.status in {520, 521}: - self._sessions.handle_error(request) + async with self._fatal_error_handler: + self._sessions.handle_error(request) reason = "download_error" else: return None diff --git a/tests/test_sessions.py b/tests/test_sessions.py index 8593f9ee..045e16e5 100644 --- a/tests/test_sessions.py +++ b/tests/test_sessions.py @@ -19,7 +19,7 @@ SessionConfig, session_config, ) -from scrapy_zyte_api._session import session_config_registry +from scrapy_zyte_api._session import SESSION_INIT_META_KEY, session_config_registry from scrapy_zyte_api.utils import _RAW_CLASS_SETTING_SUPPORT, _REQUEST_ERROR_HAS_QUERY from . import get_crawler, serialize_settings @@ -328,7 +328,7 @@ class ConstantChecker: def __init__(self, result): self._result = result - def check(self, request: Request, response: Response) -> bool: + def check(self, response: Response, request: Request) -> bool: if self._result in (True, False): return self._result raise self._result @@ -346,7 +346,12 @@ def __init__(self): class CloseSpiderChecker(ConstantChecker): def __init__(self): - super().__init__(CloseSpider("checker_failed")) + super().__init__(CloseSpider("closed_by_checker")) + + +class UnexpectedExceptionChecker(ConstantChecker): + def __init__(self): + super().__init__(Exception) class TrueCrawlerChecker(ConstantChecker): @@ -367,6 +372,34 @@ def __init__(self, crawler): super().__init__(not crawler.settings["ZYTE_API_SESSION_ENABLED"]) +class UseChecker(ConstantChecker): + """Always pass for session initialization requests, apply the check logic + only on session use requests.""" + + def check(self, response: Response, request: Request) -> bool: + if response.meta.get(SESSION_INIT_META_KEY, False) is True: + return True + return super().check(request, response) + + +class FalseUseChecker(FalseChecker, UseChecker): + pass + + +class CloseSpiderUseChecker(CloseSpiderChecker, UseChecker): + pass + + +class UnexpectedExceptionUseChecker(UnexpectedExceptionChecker, UseChecker): + pass + + +# NOTE: There is no use checker subclass for TrueChecker because the outcome +# would be the same (always return True), and there are no use checker +# subclasses for the crawler classes because the init use is enough to verify +# that using the crawler works. + + @pytest.mark.parametrize( ("checker", "close_reason", "stats"), ( @@ -397,7 +430,35 @@ def __init__(self, crawler): "bad_session_inits", {"scrapy-zyte-api/sessions/pools/example.com/init/check-failed": 1}, ), - (CloseSpiderChecker, "checker_failed", {}), + ( + FalseUseChecker, + "finished", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 2, + "scrapy-zyte-api/sessions/pools/example.com/use/check-failed": 1, + }, + ), + (CloseSpiderChecker, "closed_by_checker", {}), + ( + CloseSpiderUseChecker, + "closed_by_checker", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, + }, + ), + ( + UnexpectedExceptionChecker, + "bad_session_inits", + {"scrapy-zyte-api/sessions/pools/example.com/init/check-error": 1}, + ), + ( + UnexpectedExceptionUseChecker, + "finished", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 2, + "scrapy-zyte-api/sessions/pools/example.com/use/check-error": 1, + }, + ), ( TrueCrawlerChecker, "finished", @@ -426,7 +487,35 @@ def __init__(self, crawler): "bad_session_inits", {"scrapy-zyte-api/sessions/pools/example.com/init/check-failed": 1}, ), - ("tests.test_sessions.CloseSpiderChecker", "checker_failed", {}), + ( + "tests.test_sessions.FalseUseChecker", + "finished", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 2, + "scrapy-zyte-api/sessions/pools/example.com/use/check-failed": 1, + }, + ), + ("tests.test_sessions.CloseSpiderChecker", "closed_by_checker", {}), + ( + "tests.test_sessions.CloseSpiderUseChecker", + "closed_by_checker", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, + }, + ), + ( + "tests.test_sessions.UnexpectedExceptionChecker", + "bad_session_inits", + {"scrapy-zyte-api/sessions/pools/example.com/init/check-error": 1}, + ), + ( + "tests.test_sessions.UnexpectedExceptionUseChecker", + "finished", + { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 2, + "scrapy-zyte-api/sessions/pools/example.com/use/check-error": 1, + }, + ), ( "tests.test_sessions.TrueCrawlerChecker", "finished", @@ -445,6 +534,7 @@ def __init__(self, crawler): @ensureDeferred async def test_checker(checker, close_reason, stats, mockserver): settings = { + "RETRY_TIMES": 0, "ZYTE_API_URL": mockserver.urljoin("/"), "ZYTE_API_SESSION_CHECKER": checker, "ZYTE_API_SESSION_ENABLED": True, @@ -562,7 +652,7 @@ class CloseSpiderURLChecker: def check(self, request: Request, response: Response) -> bool: if "fail" in request.url: - raise CloseSpider("checker_failed") + raise CloseSpider("closed_by_checker") return True @@ -596,7 +686,7 @@ def closed(self, reason): for k, v in crawler.stats.get_stats().items() if k.startswith("scrapy-zyte-api/sessions") } - assert crawler.spider.close_reason == "checker_failed" + assert crawler.spider.close_reason == "closed_by_checker" assert session_stats == { "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, } @@ -1258,6 +1348,146 @@ def parse(self, response): session_config_registry.__init__() # type: ignore[misc] +@ensureDeferred +async def test_session_config_param_error(mockserver): + pytest.importorskip("web_poet") + + @session_config(["example.com"]) + class CustomSessionConfig(SessionConfig): + + def params(self, request: Request): + raise Exception + + settings = { + "RETRY_TIMES": 0, + "ZYTE_API_URL": mockserver.urljoin("/"), + "ZYTE_API_SESSION_ENABLED": True, + "ZYTE_API_SESSION_LOCATION": {"postalCode": "10001"}, + "ZYTE_API_SESSION_MAX_BAD_INITS": 1, + } + + class TestSpider(Spider): + name = "test" + start_urls = ["https://example.com"] + + def parse(self, response): + pass + + crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False) + await crawler.crawl() + + session_stats = { + k: v + for k, v in crawler.stats.get_stats().items() + if k.startswith("scrapy-zyte-api/sessions") + } + assert session_stats == { + "scrapy-zyte-api/sessions/pools/example.com/init/param-error": 1, + } + + # Clean up the session config registry. + session_config_registry.__init__() # type: ignore[misc] + + +@ensureDeferred +async def test_session_config_pool_caching(mockserver): + pytest.importorskip("web_poet") + + @session_config(["example.com"]) + class CustomSessionConfig(SessionConfig): + def __init__(self, crawler): + super().__init__(crawler) + self.pools = deque(("example.com",)) + + def pool(self, request: Request): + # The following code would fail on the second call, which never + # happens due to pool caching. + return self.pools.popleft() + + settings = { + "RETRY_TIMES": 0, + "ZYTE_API_URL": mockserver.urljoin("/"), + "ZYTE_API_SESSION_ENABLED": True, + "ZYTE_API_SESSION_LOCATION": {"postalCode": "10001"}, + "ZYTE_API_SESSION_MAX_BAD_INITS": 1, + } + + class TestSpider(Spider): + name = "test" + start_urls = ["https://example.com"] + + def parse(self, response): + pass + + def closed(self, reason): + self.close_reason = reason + + crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False) + await crawler.crawl() + + session_stats = { + k: v + for k, v in crawler.stats.get_stats().items() + if k.startswith("scrapy-zyte-api/sessions") + } + assert session_stats == { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, + "scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1, + } + assert crawler.spider.close_reason == "finished" + + # Clean up the session config registry. + session_config_registry.__init__() # type: ignore[misc] + + +@ensureDeferred +async def test_session_config_pool_error(mockserver): + # NOTE: This error should only happen during the initial process_request + # call. By the time the code reaches process_response, the cached pool + # value for that request is reused, so there is no new call to + # SessionConfig.pool that could fail during process_response only. + + pytest.importorskip("web_poet") + + @session_config(["example.com"]) + class CustomSessionConfig(SessionConfig): + + def pool(self, request: Request): + raise Exception + + settings = { + "RETRY_TIMES": 0, + "ZYTE_API_URL": mockserver.urljoin("/"), + "ZYTE_API_SESSION_ENABLED": True, + "ZYTE_API_SESSION_LOCATION": {"postalCode": "10001"}, + "ZYTE_API_SESSION_MAX_BAD_INITS": 1, + } + + class TestSpider(Spider): + name = "test" + start_urls = ["https://example.com"] + + def parse(self, response): + pass + + def closed(self, reason): + self.close_reason = reason + + crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False) + await crawler.crawl() + + session_stats = { + k: v + for k, v in crawler.stats.get_stats().items() + if k.startswith("scrapy-zyte-api/sessions") + } + assert session_stats == {} + assert crawler.spider.close_reason == "pool_error" + + # Clean up the session config registry. + session_config_registry.__init__() # type: ignore[misc] + + @ensureDeferred async def test_session_config_no_web_poet(mockserver): """If web-poet is not installed, @session_config raises a RuntimeError."""