diff --git a/docs/reference/inputs.rst b/docs/reference/inputs.rst index 3b2d15f9..75d8f9d4 100644 --- a/docs/reference/inputs.rst +++ b/docs/reference/inputs.rst @@ -13,6 +13,17 @@ Inputs - :class:`web_poet.BrowserResponse` +- :class:`web_poet.AnyResponse` + + This re-uses either :class:`web_poet.BrowserResponse` *(takes priority)* + or :class:`web_poet.HttpResponse` if they're available. + + If neither is available, it would use :class:`web_poet.HttpResponse` + requested from Zyte API. However, if other item inputs (e.g. + :class:`zyte_common_items.Product`) are present, it would request + :class:`web_poet.BrowserResponse` from Zyte API unless an extraction + source is provided. + - :class:`zyte_common_items.Article` - :class:`zyte_common_items.ArticleList` diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index c205bbf6..a69e36ff 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -1,12 +1,17 @@ from typing import Any, Callable, Dict, List, Sequence, Set -from weakref import WeakKeyDictionary from andi.typeutils import is_typing_annotated, strip_annotated from scrapy import Request from scrapy.crawler import Crawler from scrapy.utils.defer import maybe_deferred_to_future from scrapy_poet import AnnotatedResult, PageObjectInputProvider -from web_poet import BrowserHtml, BrowserResponse +from web_poet import ( + AnyResponse, + BrowserHtml, + BrowserResponse, + HttpResponse, + HttpResponseHeaders, +) from zyte_common_items import ( Article, ArticleList, @@ -40,34 +45,37 @@ class ZyteApiProvider(PageObjectInputProvider): Article, ArticleList, ArticleNavigation, + AnyResponse, JobPosting, Geolocation, } - def __init__(self, injector): - super().__init__(injector) - self._cached_instances: WeakKeyDictionary[Request, Dict] = WeakKeyDictionary() - def is_provided(self, type_: Callable) -> bool: return super().is_provided(strip_annotated(type_)) - def update_cache(self, request: Request, mapping: Dict[Any, Any]) -> None: - if request not in self._cached_instances: - self._cached_instances[request] = {} - self._cached_instances[request].update(mapping) - async def __call__( # noqa: C901 self, to_provide: Set[Callable], request: Request, crawler: Crawler ) -> Sequence[Any]: """Makes a Zyte API request to provide BrowserResponse and/or item dependencies.""" - # TODO what if ``response`` is already from Zyte API and contains something we need results: List[Any] = [] + http_response = None for cls in list(to_provide): - item = self._cached_instances.get(request, {}).get(cls) + item = self.injector.weak_cache.get(request, {}).get(cls) if item: results.append(item) to_provide.remove(cls) + + # BrowserResponse takes precedence over HttpResponse + elif cls == AnyResponse and BrowserResponse not in to_provide: + http_response = self.injector.weak_cache.get(request, {}).get( + HttpResponse + ) + if http_response: + any_response = AnyResponse(response=http_response) + results.append(any_response) + to_provide.remove(cls) + if not to_provide: return results @@ -83,11 +91,10 @@ async def __call__( # noqa: C901 } zyte_api_meta = crawler.settings.getdict("ZYTE_API_PROVIDER_PARAMS") - if html_requested: - zyte_api_meta["browserHtml"] = True to_provide_stripped: Set[type] = set() extract_from_seen: Dict[str, str] = {} + item_requested: bool = False for cls in to_provide: cls_stripped = strip_annotated(cls) @@ -100,6 +107,7 @@ async def __call__( # noqa: C901 kw = item_keywords.get(cls_stripped) if not kw: continue + item_requested = True to_provide_stripped.add(cls_stripped) zyte_api_meta[kw] = True if not is_typing_annotated(cls): @@ -118,10 +126,32 @@ async def __call__( # noqa: C901 options["extractFrom"] = extract_from.value break + http_response_needed = ( + AnyResponse in to_provide + and BrowserResponse not in to_provide + and BrowserHtml not in to_provide + and not http_response + ) + + extract_from = None # type: ignore[assignment] for item_type, kw in item_keywords.items(): options_name = f"{kw}Options" if item_type not in to_provide_stripped and options_name in zyte_api_meta: del zyte_api_meta[options_name] + elif zyte_api_meta.get(options_name, {}).get("extractFrom"): + extract_from = zyte_api_meta[options_name]["extractFrom"] + + if AnyResponse in to_provide: + if ( + item_requested and extract_from != "httpResponseBody" + ) or extract_from == "browserHtml": + html_requested = True + elif extract_from == "httpResponseBody" or http_response_needed: + zyte_api_meta["httpResponseBody"] = True + zyte_api_meta["httpResponseHeaders"] = True + + if html_requested: + zyte_api_meta["browserHtml"] = True api_request = Request( url=request.url, @@ -142,15 +172,45 @@ async def __call__( # noqa: C901 html = None if BrowserHtml in to_provide: results.append(html) - self.update_cache(request, {BrowserHtml: html}) + + browser_response = None if BrowserResponse in to_provide: - response = BrowserResponse( + browser_response = BrowserResponse( url=api_response.url, status=api_response.status, html=html, ) - results.append(response) - self.update_cache(request, {BrowserResponse: response}) + results.append(browser_response) + + if AnyResponse in to_provide: + any_response = None # type: ignore[assignment] + + if "browserHtml" in api_response.raw_api_response: + any_response = AnyResponse( + response=browser_response + or BrowserResponse( + url=api_response.url, + status=api_response.status, + html=html, + ) + ) + elif ( + "httpResponseBody" in api_response.raw_api_response + and "httpResponseHeaders" in api_response.raw_api_response + ): + any_response = AnyResponse( + response=HttpResponse( + url=api_response.url, + body=api_response.body, + status=api_response.status, + headers=HttpResponseHeaders.from_bytes_dict( + api_response.headers + ), + ) + ) + + if any_response: + results.append(any_response) for cls in to_provide: cls_stripped = strip_annotated(cls) @@ -163,9 +223,8 @@ async def __call__( # noqa: C901 if not kw: continue assert issubclass(cls_stripped, Item) - item = cls_stripped.from_dict(api_response.raw_api_response[kw]) + item = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined] if is_typing_annotated(cls): item = AnnotatedResult(item, cls.__metadata__) # type: ignore[attr-defined] results.append(item) - self.update_cache(request, {cls: item}) return results diff --git a/setup.py b/setup.py index ab445c67..f760e71a 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ def get_version(): # Sync with [testenv:pinned-provider] @ tox.ini "provider": [ "andi>=0.6.0", - "scrapy-poet>=0.20.1", - "web-poet>=0.15.1", + "scrapy-poet>=0.21.0", + "web-poet>=0.16.0", "zyte-common-items>=0.8.0", ] }, diff --git a/tests/test_providers.py b/tests/test_providers.py index 37c4f526..72652f07 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -12,10 +12,19 @@ from scrapy_poet.utils.testing import create_scrapy_settings as _create_scrapy_settings from twisted.internet import reactor from twisted.web.client import Agent, readBody -from web_poet import BrowserHtml, BrowserResponse, ItemPage, field, handle_urls +from web_poet import ( + AnyResponse, + BrowserHtml, + BrowserResponse, + HttpResponse, + ItemPage, + field, + handle_urls, +) from zyte_common_items import BasePage, Product from scrapy_zyte_api._annotations import ExtractFrom, Geolocation +from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.providers import ZyteApiProvider from . import SETTINGS @@ -23,7 +32,7 @@ def create_scrapy_settings(): - settings = _create_scrapy_settings(None) + settings = _create_scrapy_settings() for setting, value in SETTINGS.items(): if setting.endswith("_MIDDLEWARES") and settings[setting]: settings[setting].update(value) @@ -312,3 +321,521 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore item, url, _ = await crawl_single_item(GeoZyteAPISpider, HtmlResource, settings) assert item is None assert "Geolocation dependencies must be annotated" in caplog.text + + +class RecordingHandler(ScrapyZyteAPIDownloadHandler): + """Subclasses the original handler in order to record the Zyte API parameters + used for each downloading request. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.params = [] + + def _log_request(self, params): + self.params.append(params) + + +def provider_settings(server): + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = server.urljoin("/") + settings["ZYTE_API_TRANSPARENT_MODE"] = True + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1100} + settings["DOWNLOAD_HANDLERS"]["http"] = RecordingHandler + return settings + + +CUSTOM_HTTP_REQUEST_HEADERS = [ + { + "name": "Accept", + "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + }, + {"name": "Accept-Language", "value": "en"}, + {"name": "Accept-Encoding", "value": "gzip, deflate, br"}, +] + + +@ensureDeferred +async def test_provider_any_response_only(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "httpResponseBody": True, + "httpResponseHeaders": True, + } + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == HttpResponse + + +@ensureDeferred +async def test_provider_any_response_product(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "browserHtml": True, + } + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].product) == Product + + +@ensureDeferred +async def test_provider_any_response_product_extract_from_browser_html(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + product_options = {"extractFrom": "browserHtml"} + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": product_options} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "browserHtml": True, + "productOptions": product_options, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].product) == Product + + +@ensureDeferred +async def test_provider_any_response_product_item_extract_from_browser_html(mockserver): + @attrs.define + class SomePage(ItemPage[Product]): + response: AnyResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage, product: Product): + yield {"page": page, "product": product} + + product_options = {"extractFrom": "browserHtml"} + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": product_options} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "browserHtml": True, + "productOptions": product_options, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["product"]) == Product + + +@ensureDeferred +async def test_provider_any_response_product_extract_from_browser_html_2(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + browser_response: BrowserResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + product_options = {"extractFrom": "browserHtml"} + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": product_options} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "browserHtml": True, + "productOptions": product_options, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].browser_response) == BrowserResponse + assert type(item["page"].product) == Product + + assert id(item["page"].browser_response) == id(item["page"].response.response) + + +@ensureDeferred +async def test_provider_any_response_product_extract_from_http_response(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + product_options = {"extractFrom": "httpResponseBody"} + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": product_options} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "httpResponseBody": True, + "productOptions": product_options, + "httpResponseHeaders": True, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == HttpResponse + assert type(item["page"].product) == Product + + +@ensureDeferred +async def test_provider_any_response_product_options_empty(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": {}} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "browserHtml": True, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].product) == Product + + +# The issue here is that HttpResponseProvider runs earlier than ScrapyZyteAPI. +# HttpResponseProvider doesn't know that it should not run since ScrapyZyteAPI +# could provide HttpResponse in anycase. +@pytest.mark.xfail(reason="Not supported yet", raises=AssertionError, strict=True) +@ensureDeferred +async def test_provider_any_response_product_extract_from_http_response_2(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + http_response: HttpResponse + product: Product + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + product_options = {"extractFrom": "httpResponseBody"} + settings = provider_settings(mockserver) + settings["ZYTE_API_PROVIDER_PARAMS"] = {"productOptions": product_options} + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "product": True, + "httpResponseBody": True, + "httpResponseHeaders": True, + "productOptions": product_options, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == HttpResponse + assert type(item["page"].product) == Product + assert type(item["page"].http_response) == HttpResponse + + +@ensureDeferred +async def test_provider_any_response_browser_html(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + html: BrowserHtml + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == {"url": url, "browserHtml": True} + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].html) == BrowserHtml + + +@ensureDeferred +async def test_provider_any_response_browser_response(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + browser_response: BrowserResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == {"url": url, "browserHtml": True} + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].browser_response) == BrowserResponse + + +@ensureDeferred +async def test_provider_any_response_browser_html_response(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + browser_response: BrowserResponse + html: BrowserHtml + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == {"url": url, "browserHtml": True} + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].browser_response) == BrowserResponse + assert type(item["page"].html) == BrowserHtml + + +@ensureDeferred +async def test_provider_any_response_http_response(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + http_response: HttpResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "httpResponseBody": True, + "httpResponseHeaders": True, + # This is actually set by HttpResponseProvider + "customHttpRequestHeaders": CUSTOM_HTTP_REQUEST_HEADERS, + } + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == HttpResponse + assert type(item["page"].http_response) == HttpResponse + + +@ensureDeferred +async def test_provider_any_response_browser_http_response(mockserver): + @attrs.define + class SomePage(BasePage): + response: AnyResponse + browser_response: BrowserResponse + http_response: HttpResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page: SomePage): + yield {"page": page} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 2 + assert params[0] == { + "url": url, + "httpResponseBody": True, + "httpResponseHeaders": True, + # This is actually set by HttpResponseProvider + "customHttpRequestHeaders": CUSTOM_HTTP_REQUEST_HEADERS, + } + assert params[1] == {"url": url, "browserHtml": True} + + assert type(item["page"].response) == AnyResponse + assert type(item["page"].response.response) == BrowserResponse + assert type(item["page"].browser_response) == BrowserResponse + assert type(item["page"].http_response) == HttpResponse + + assert id(item["page"].browser_response) == id(item["page"].response.response) + + +@ensureDeferred +async def test_provider_any_response_http_response_multiple_pages(mockserver): + @attrs.define + class FirstPage(BasePage): + http_response: HttpResponse + + @attrs.define + class SecondPage(BasePage): + http_response: HttpResponse + response: AnyResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page1: FirstPage, page2: SecondPage): + yield {"page1": page1, "page2": page2} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 1 + assert params[0] == { + "url": url, + "httpResponseBody": True, + "httpResponseHeaders": True, + # This is actually set by HttpResponseProvider + "customHttpRequestHeaders": CUSTOM_HTTP_REQUEST_HEADERS, + } + assert type(item["page1"].http_response) == HttpResponse + assert type(item["page2"].http_response) == HttpResponse + assert type(item["page2"].response) == AnyResponse + assert type(item["page2"].response.response) == HttpResponse + + +@ensureDeferred +async def test_provider_any_response_http_browser_response_multiple_pages(mockserver): + @attrs.define + class FirstPage(BasePage): + browser_response: BrowserResponse + + @attrs.define + class SecondPage(BasePage): + http_response: HttpResponse + response: AnyResponse + + class ZyteAPISpider(Spider): + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_(self, response: DummyResponse, page1: FirstPage, page2: SecondPage): + yield {"page1": page1, "page2": page2} + + settings = provider_settings(mockserver) + item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + params = crawler.engine.downloader.handlers._handlers["http"].params + + assert len(params) == 2 + assert params[0] == { + "url": url, + "httpResponseBody": True, + "httpResponseHeaders": True, + # This is actually set by HttpResponseProvider + "customHttpRequestHeaders": CUSTOM_HTTP_REQUEST_HEADERS, + } + assert params[1] == {"url": url, "browserHtml": True} + + assert type(item["page1"].browser_response) == BrowserResponse + assert type(item["page2"].http_response) == HttpResponse + assert type(item["page2"].response) == AnyResponse + assert type(item["page2"].response.response) == BrowserResponse diff --git a/tox.ini b/tox.ini index 2c968f22..e36e3dd1 100644 --- a/tox.ini +++ b/tox.ini @@ -88,8 +88,8 @@ deps = # scrapy-poet >= 0.4.0 depends on scrapy >= 2.6.0 {[testenv:pinned-scrapy-2x6]deps} andi==0.6.0 - scrapy-poet==0.20.1 - web-poet==0.15.1 + scrapy-poet==0.21.0 + web-poet==0.16.0 zyte-common-items==0.8.0 [testenv:pinned-extra]