Skip to content

Commit

Permalink
Forbid multiple extractFrom.
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed Oct 16, 2023
1 parent ab34b93 commit ecc42d4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
2 changes: 2 additions & 0 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ async def __call__(
for option in ExtractFrom:
if option in metadata:
product_options = zyte_api_meta.setdefault("productOptions", {})
if "extractFrom" in product_options:
raise ValueError("Multiple extractFrom specified")
product_options["extractFrom"] = option.value
break

Expand Down
26 changes: 26 additions & 0 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,13 @@ async def test_provider_extractfrom(mockserver):
@attrs.define
class AnnotatedProductPage(BasePage):
product: Annotated[Product, ExtractFrom.httpResponseBody]
product2: Annotated[Product, ExtractFrom.httpResponseBody]

class AnnotatedZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: AnnotatedProductPage):
yield {
"product": page.product,
"product2": page.product,
}

settings = create_scrapy_settings(None)
Expand All @@ -211,3 +213,27 @@ def parse_(self, response: DummyResponse, page: AnnotatedProductPage):
currency="USD",
)
)


@ensureDeferred
async def test_provider_extractfrom_double(mockserver):
from typing import Annotated

@attrs.define
class AnnotatedProductPage(BasePage):
product: Annotated[Product, ExtractFrom.httpResponseBody]
product2: Annotated[Product, ExtractFrom.browserHtml]

class AnnotatedZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: AnnotatedProductPage):
yield {
"product": page.product,
}

settings = create_scrapy_settings(None)
settings.update(SETTINGS)
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}

item, _, _ = await crawl_single_item(AnnotatedZyteAPISpider, HtmlResource, settings)
assert item is None

0 comments on commit ecc42d4

Please sign in to comment.