use HttpOrBrowserRespose

zytedata · Jan 16, 2024 · a4c67a6 · a4c67a6
1 parent 015c590
commit a4c67a6
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 10 deletions.
diff --git a/setup.py b/setup.py
@@ -16,7 +16,8 @@
         "scrapy>=2.11.0",
         "scrapy-poet>=0.16.0",
         "scrapy-spider-metadata>=0.1.2",
-        "scrapy-zyte-api[provider]>=0.12.2",
+        # "scrapy-zyte-api[provider]>=0.12.2",
+        "scrapy-zyte-api @ git+https://[email protected]/scrapy-plugins/scrapy-zyte-api@http-or-browser-response#egg=scrapy-zyte-api"
         "zyte-common-items>=0.13.0",
     ],
     classifiers=[

diff --git a/zyte_spider_templates/pages/product_navigation_heuristics.py b/zyte_spider_templates/pages/product_navigation_heuristics.py
@@ -3,7 +3,7 @@
 import attrs
 from scrapy.http import TextResponse
 from scrapy.linkextractors import LinkExtractor
-from web_poet import HttpResponse, PageParams, field, handle_urls
+from web_poet import HttpResponse, PageParams, field, handle_urls, HttpOrBrowserResponse, BrowserResponse
 from zyte_common_items import AutoProductNavigationPage, ProbabilityRequest
 
 from zyte_spider_templates.heuristics import might_be_category
@@ -12,12 +12,7 @@
 @handle_urls("")
 @attrs.define
 class HeuristicsProductNavigationPage(AutoProductNavigationPage):
-    # TODO: swap with BrowserResponse after evaluating it.
-    # Also after when the following issue has been fixed:
-    # https://github.com/scrapy-plugins/scrapy-zyte-api/issues/91#issuecomment-1744305554
-    # NOTE: Even with BrowserResponse, it would still send separate
-    # requests for it and productNavigation.
-    response: HttpResponse
+    response: HttpOrBrowserResponse
     page_params: PageParams
 
     @field
@@ -55,7 +50,7 @@ def _probably_category_links(self) -> List[ProbabilityRequest]:
         ignore_urls = set(self._urls_for_category())
 
         links = []
-        response = TextResponse(url=str(self.response.url), body=self.response.body)
+        response = TextResponse(url=self.response.url, body=self.response.text.encode())
         for link in link_extractor.extract_links(response):
             if link.url in ignore_urls:
                 continue

diff --git a/zyte_spider_templates/spiders/ecommerce.py b/zyte_spider_templates/spiders/ecommerce.py
@@ -69,7 +69,7 @@ class EcommerceSpiderParams(BaseSpiderParams):
             "Whether to perform extraction using a browser request "
             "(browserHtml) or an HTTP request (httpResponseBody)."
         ),
-        default=None,
+        default=ExtractFrom.browserHtml,
         json_schema_extra={
             "enumMeta": {
                 ExtractFrom.browserHtml: {