Skip to content

Commit

Permalink
use HttpOrBrowserRespose
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed Jan 16, 2024
1 parent 015c590 commit a4c67a6
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 10 deletions.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"scrapy>=2.11.0",
"scrapy-poet>=0.16.0",
"scrapy-spider-metadata>=0.1.2",
"scrapy-zyte-api[provider]>=0.12.2",
# "scrapy-zyte-api[provider]>=0.12.2",
"scrapy-zyte-api @ git+https://[email protected]/scrapy-plugins/scrapy-zyte-api@http-or-browser-response#egg=scrapy-zyte-api"
"zyte-common-items>=0.13.0",
],
classifiers=[
Expand Down
11 changes: 3 additions & 8 deletions zyte_spider_templates/pages/product_navigation_heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import attrs
from scrapy.http import TextResponse
from scrapy.linkextractors import LinkExtractor
from web_poet import HttpResponse, PageParams, field, handle_urls
from web_poet import HttpResponse, PageParams, field, handle_urls, HttpOrBrowserResponse, BrowserResponse
from zyte_common_items import AutoProductNavigationPage, ProbabilityRequest

from zyte_spider_templates.heuristics import might_be_category
Expand All @@ -12,12 +12,7 @@
@handle_urls("")
@attrs.define
class HeuristicsProductNavigationPage(AutoProductNavigationPage):
# TODO: swap with BrowserResponse after evaluating it.
# Also after when the following issue has been fixed:
# https://github.com/scrapy-plugins/scrapy-zyte-api/issues/91#issuecomment-1744305554
# NOTE: Even with BrowserResponse, it would still send separate
# requests for it and productNavigation.
response: HttpResponse
response: HttpOrBrowserResponse
page_params: PageParams

@field
Expand Down Expand Up @@ -55,7 +50,7 @@ def _probably_category_links(self) -> List[ProbabilityRequest]:
ignore_urls = set(self._urls_for_category())

links = []
response = TextResponse(url=str(self.response.url), body=self.response.body)
response = TextResponse(url=self.response.url, body=self.response.text.encode())
for link in link_extractor.extract_links(response):
if link.url in ignore_urls:
continue
Expand Down
2 changes: 1 addition & 1 deletion zyte_spider_templates/spiders/ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class EcommerceSpiderParams(BaseSpiderParams):
"Whether to perform extraction using a browser request "
"(browserHtml) or an HTTP request (httpResponseBody)."
),
default=None,
default=ExtractFrom.browserHtml,
json_schema_extra={
"enumMeta": {
ExtractFrom.browserHtml: {
Expand Down

0 comments on commit a4c67a6

Please sign in to comment.