From 68eaacd23f7af6b0ccaaade257fd8686be484ca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 18 Jun 2024 01:26:45 +0200 Subject: [PATCH 1/2] Implement auto_field field metadata --- CHANGELOG.rst | 2 +- docs/usage/pages.rst | 57 ++++++++++++++----- tests/test_adapter.py | 15 ++--- tests/test_pages.py | 18 +++++- zyte_common_items/fields.py | 42 ++++++++++++++ zyte_common_items/pages/article.py | 39 ++++++------- zyte_common_items/pages/article_list.py | 13 +++-- zyte_common_items/pages/article_navigation.py | 17 +++--- zyte_common_items/pages/business_place.py | 53 ++++++++--------- zyte_common_items/pages/job_posting.py | 45 ++++++++------- zyte_common_items/pages/product.py | 55 +++++++++--------- zyte_common_items/pages/product_list.py | 19 ++++--- zyte_common_items/pages/product_navigation.py | 17 +++--- zyte_common_items/pages/real_estate.py | 55 +++++++++--------- zyte_common_items/pages/social_media_post.py | 21 +++---- 15 files changed, 279 insertions(+), 189 deletions(-) create mode 100644 zyte_common_items/fields.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 008c032e..9c38a315 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -20,7 +20,7 @@ Changelog with the ``probability`` value lower than a set threshold. * Added the :class:`~.BaseMetadata`, :class:`~.ListMetadata`, and - :class:`~.DetailMetadata` classes (they were previously private). + :class:`~.DetailsMetadata` classes (they were previously private). * Added the :attr:`.ListMetadata.validationMessages` attribute. diff --git a/docs/usage/pages.rst b/docs/usage/pages.rst index 185a7d69..bd8f4515 100644 --- a/docs/usage/pages.rst +++ b/docs/usage/pages.rst @@ -45,6 +45,32 @@ whose ``to_item`` method returns an instance of def name(self): return self.css("h1::text").get() +.. _extractors: + +Extractors +========== + +For some nested fields (:class:`~.ProductFromList`, :class:`~.ProductVariant`), +:ref:`base extractors ` exist that you can subclass +to write your own extractors. + +They provide the following base line: + +- They declare the :ref:`item class ` that they return, allowing for + their ``to_item`` method to automatically build an instance of it from + ``@field``-decorated methods. See :ref:`fields`. + +- They also provide default :ref:`processors ` for some + item-specific fields. + +See :ref:`extractor-api`. + + +.. _auto: + +Auto page object classes +======================== + Page object classes with the ``Auto`` prefix can be used to easily define page object classes that get an :ref:`item ` as a dependency from another page object class, can generate an identical item by default, and can also @@ -72,23 +98,24 @@ extra fields. For example: def foo(self): return "bar" -.. _extractors: - -Extractors -========== +Fields of these classes have ``auto_field`` set to ``True`` in their field +metadata, so that you can check if a page object subclass is overriding a field +using :func:`~zyte_common_items.fields.is_auto_field`: -For some nested fields (:class:`~.ProductFromList`, :class:`~.ProductVariant`), -:ref:`base extractors ` exist that you can subclass -to write your own extractors. +.. autofunction:: zyte_common_items.fields.is_auto_field -They provide the following base line: - -- They declare the :ref:`item class ` that they return, allowing for - their ``to_item`` method to automatically build an instance of it from - ``@field``-decorated methods. See :ref:`fields`. +.. code-block:: python -- They also provide default :ref:`processors ` for some - item-specific fields. + print(is_auto_field(ExtendedProductPage, "name")) # Returns False + print(is_auto_field(ExtendedProductPage, "foo")) # Returns False + print(is_auto_field(ExtendedProductPage, "brand")) # Returns True + print(is_auto_field(ExtendedProductPage, "bar")) # Raises KeyError -See :ref:`extractor-api`. +If you are overriding a field method but the method continues to return the +value straight from the ``Auto``-prefixed class, e.g. because you are only +overriding field metadata or :ref:`processors `, you should also +set ``auto_field`` to ``True``. Instead of setting it manually in the field +meta, you can replace the :func:`~web_poet.fields.field` decorator with +:func:`~zyte_common_items.fields.auto_field`: +.. autofunction:: zyte_common_items.fields.auto_field diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 9071887a..404e4ddc 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -1,16 +1,15 @@ -from collections import deque from collections.abc import Collection from contextlib import contextmanager +from copy import copy # In Python ≤ 3.8 you cannot annotate with “collections.abc.Collection[Item]”, # so we need to import typing.Collection for annotation instead. from typing import Collection as CollectionType -from typing import Deque, Optional, Type, cast +from typing import Optional import attrs import pytest from itemadapter import ItemAdapter -from itemadapter.adapter import AdapterInterface from zyte_common_items import Item, Product, ZyteItemAdapter from zyte_common_items.adapter import ZyteItemKeepEmptyAdapter @@ -20,11 +19,12 @@ @contextmanager def configured_adapter(adapter=ZyteItemAdapter): - ItemAdapter.ADAPTER_CLASSES.appendleft(adapter) + original_value = copy(ItemAdapter.ADAPTER_CLASSES) + ItemAdapter.ADAPTER_CLASSES = (adapter, *ItemAdapter.ADAPTER_CLASSES) try: yield finally: - ItemAdapter.ADAPTER_CLASSES.popleft() + ItemAdapter.ADAPTER_CLASSES = original_value def test_asdict_all_fields(): @@ -391,10 +391,7 @@ class _Item(Item): children: CollectionType[Item] class TestAdapter(ItemAdapter): - ADAPTER_CLASSES = ( - cast(Deque[Type[AdapterInterface]], deque([ZyteItemKeepEmptyAdapter])) - + ItemAdapter.ADAPTER_CLASSES - ) + ADAPTER_CLASSES = [ZyteItemKeepEmptyAdapter] + list(ItemAdapter.ADAPTER_CLASSES) item = _Item([]) adapter = TestAdapter(item) diff --git a/tests/test_pages.py b/tests/test_pages.py index 7be34e8e..574e73a0 100644 --- a/tests/test_pages.py +++ b/tests/test_pages.py @@ -5,6 +5,7 @@ import attrs import pytest from web_poet import HttpResponse, RequestUrl, ResponseUrl, Returns, field +from web_poet.fields import get_fields_dict import zyte_common_items from zyte_common_items import ( @@ -25,6 +26,7 @@ Request, ) from zyte_common_items._dateutils import utcnow +from zyte_common_items.fields import is_auto_field @pytest.mark.parametrize( @@ -111,8 +113,6 @@ class MyProductPage(ProductPage): def brand(self): return "baz" - from web_poet.fields import get_fields_dict - assert set(get_fields_dict(MyProductListPage)) == {"metadata", "products", "url"} assert set(get_fields_dict(MyProductPage)) == { "brand", @@ -469,3 +469,17 @@ def nextPage(self): assert request.metadata is not None assert request.metadata.probability == 1.0 assert type(page.nextPage) is Request + + +def test_auto_fields(): + """Every field of an Auto-prefixed class should have ``auto_field`` set to + ``True`` in its field metadata.""" + auto_page_names = { + obj_name + for obj_name in zyte_common_items.__dict__ + if (obj_name.startswith("Auto") and obj_name.endswith("Page")) + } + for auto_page_name in auto_page_names: + auto_page_cls = zyte_common_items.__dict__[auto_page_name] + for field_name in get_fields_dict(auto_page_cls): + assert is_auto_field(auto_page_cls, field_name) diff --git a/zyte_common_items/fields.py b/zyte_common_items/fields.py new file mode 100644 index 00000000..ddeac701 --- /dev/null +++ b/zyte_common_items/fields.py @@ -0,0 +1,42 @@ +from typing import Callable, List, Optional + +from web_poet import ItemPage, field +from web_poet.fields import get_fields_dict + + +def auto_field( + method=None, + *, + cached: bool = False, + meta: Optional[dict] = None, + out: Optional[List[Callable]] = None, +): + """Decorator that works like :func:`web_poet.fields.field` but sets + ``auto_field`` to ``True`` by default in *meta*. + + .. code-block:: python + + from zyte_common_items import AutoProductPage + from zyte_common_items.fields import auto_field + + + class ProductPage(AutoProductPage): + @auto_field(out=[str.capitalize]) + def name(self): + return super().name + """ + meta = meta or {} + meta.setdefault("auto_field", True) + return field(method, cached=cached, meta=meta, out=out) + + +def is_auto_field(cls: ItemPage, field: str): + """Return ``True`` if the field named *field* of the *cls* page object + class has ``auto_field`` set to ``True`` in its field metadata. + + All fields defined in :ref:`auto page object classes ` meet this + condition. + """ + fields_dict = get_fields_dict(cls) + field_meta = fields_dict[field].meta or {} + return field_meta.get("auto_field", False) diff --git a/zyte_common_items/pages/article.py b/zyte_common_items/pages/article.py index e5be3142..55add465 100644 --- a/zyte_common_items/pages/article.py +++ b/zyte_common_items/pages/article.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import Audio, Author, Breadcrumb, Image, Video +from zyte_common_items.fields import auto_field from zyte_common_items.items import Article, ArticleMetadata from zyte_common_items.processors import breadcrumbs_processor @@ -29,74 +30,74 @@ class Processors(Page.Processors): class AutoArticlePage(BaseArticlePage): article: Article - @field + @auto_field def headline(self) -> Optional[str]: return self.article.headline - @field + @auto_field def datePublished(self) -> Optional[str]: return self.article.datePublished - @field + @auto_field def datePublishedRaw(self) -> Optional[str]: return self.article.datePublishedRaw - @field + @auto_field def dateModified(self) -> Optional[str]: return self.article.dateModified - @field + @auto_field def dateModifiedRaw(self) -> Optional[str]: return self.article.dateModifiedRaw - @field + @auto_field def authors(self) -> Optional[List[Author]]: return self.article.authors - @field + @auto_field def breadcrumbs(self) -> Optional[List[Breadcrumb]]: return self.article.breadcrumbs - @field + @auto_field def inLanguage(self) -> Optional[str]: return self.article.inLanguage - @field + @auto_field def mainImage(self) -> Optional[Image]: return self.article.mainImage - @field + @auto_field def images(self) -> Optional[List[Image]]: return self.article.images - @field + @auto_field def description(self) -> Optional[str]: return self.article.description - @field + @auto_field def articleBody(self) -> Optional[str]: return self.article.articleBody - @field + @auto_field def articleBodyHtml(self) -> Optional[str]: return self.article.articleBodyHtml - @field + @auto_field def videos(self) -> Optional[List[Video]]: return self.article.videos - @field + @auto_field def audios(self) -> Optional[List[Audio]]: return self.article.audios - @field + @auto_field def canonicalUrl(self) -> Optional[str]: return self.article.canonicalUrl - @field + @auto_field def url(self) -> Optional[str]: return self.article.url - @field + @auto_field def metadata(self) -> Optional[ArticleMetadata]: return self.article.metadata diff --git a/zyte_common_items/pages/article_list.py b/zyte_common_items/pages/article_list.py index 8c3ce34a..40b15c56 100644 --- a/zyte_common_items/pages/article_list.py +++ b/zyte_common_items/pages/article_list.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import Breadcrumb +from zyte_common_items.fields import auto_field from zyte_common_items.items import ArticleFromList, ArticleList, ArticleListMetadata from zyte_common_items.processors import breadcrumbs_processor @@ -31,22 +32,22 @@ class Processors(Page.Processors): class AutoArticleListPage(BaseArticleListPage): article_list: ArticleList - @field + @auto_field def articles(self) -> Optional[List[ArticleFromList]]: return self.article_list.articles - @field + @auto_field def breadcrumbs(self) -> Optional[List[Breadcrumb]]: return self.article_list.breadcrumbs - @field + @auto_field def canonicalUrl(self) -> Optional[str]: return self.article_list.canonicalUrl - @field + @auto_field def metadata(self) -> Optional[ArticleListMetadata]: return self.article_list.metadata - @field + @auto_field def url(self) -> Optional[str]: return self.article_list.url diff --git a/zyte_common_items/pages/article_navigation.py b/zyte_common_items/pages/article_navigation.py index 82b6817e..7a84d39d 100644 --- a/zyte_common_items/pages/article_navigation.py +++ b/zyte_common_items/pages/article_navigation.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import ProbabilityRequest, Request +from zyte_common_items.fields import auto_field from zyte_common_items.items import ArticleNavigation, ArticleNavigationMetadata from .base import BasePage, Page @@ -26,30 +27,30 @@ class ArticleNavigationPage( class AutoArticleNavigationPage(BaseArticleNavigationPage): article_navigation: ArticleNavigation - @field + @auto_field def categoryName(self) -> Optional[str]: return self.article_navigation.categoryName - @field + @auto_field def items(self) -> Optional[List[ProbabilityRequest]]: return self.article_navigation.items - @field + @auto_field def metadata(self) -> Optional[ArticleNavigationMetadata]: return self.article_navigation.metadata - @field + @auto_field def nextPage(self) -> Optional[Request]: return self.article_navigation.nextPage - @field + @auto_field def pageNumber(self) -> Optional[int]: return self.article_navigation.pageNumber - @field + @auto_field def subCategories(self) -> Optional[List[ProbabilityRequest]]: return self.article_navigation.subCategories - @field + @auto_field def url(self) -> Optional[str]: return self.article_navigation.url diff --git a/zyte_common_items/pages/business_place.py b/zyte_common_items/pages/business_place.py index 4a6ab239..c0ec6ab6 100644 --- a/zyte_common_items/pages/business_place.py +++ b/zyte_common_items/pages/business_place.py @@ -1,7 +1,7 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import ( AdditionalProperty, @@ -14,6 +14,7 @@ ParentPlace, StarRating, ) +from zyte_common_items.fields import auto_field from zyte_common_items.items import BusinessPlace, BusinessPlaceMetadata from zyte_common_items.processors import description_processor, rating_processor @@ -45,102 +46,102 @@ class Processors(Page.Processors): class AutoBusinessPlacePage(BaseBusinessPlacePage): business_place: BusinessPlace - @field + @auto_field def actions(self) -> Optional[List[NamedLink]]: return self.business_place.actions - @field + @auto_field def additionalProperties(self) -> Optional[List[AdditionalProperty]]: return self.business_place.additionalProperties - @field + @auto_field def address(self) -> Optional[Address]: return self.business_place.address - @field + @auto_field def aggregateRating(self) -> Optional[AggregateRating]: return self.business_place.aggregateRating - @field + @auto_field def amenityFeatures(self) -> Optional[List[Amenity]]: return self.business_place.amenityFeatures - @field + @auto_field def categories(self) -> Optional[List[str]]: return self.business_place.categories - @field + @auto_field def containedInPlace(self) -> Optional[ParentPlace]: return self.business_place.containedInPlace - @field + @auto_field def description(self) -> Optional[str]: return self.business_place.description - @field + @auto_field def features(self) -> Optional[List[str]]: return self.business_place.features - @field + @auto_field def images(self) -> Optional[List[Image]]: return self.business_place.images - @field + @auto_field def isVerified(self) -> Optional[bool]: return self.business_place.isVerified - @field + @auto_field def map(self) -> Optional[str]: return self.business_place.map - @field + @auto_field def metadata(self) -> Optional[BusinessPlaceMetadata]: return self.business_place.metadata - @field + @auto_field def name(self) -> Optional[str]: return self.business_place.name - @field + @auto_field def openingHours(self) -> Optional[List[OpeningHoursItem]]: return self.business_place.openingHours - @field + @auto_field def placeId(self) -> Optional[str]: return self.business_place.placeId - @field + @auto_field def priceRange(self) -> Optional[str]: return self.business_place.priceRange - @field + @auto_field def reservationAction(self) -> Optional[NamedLink]: return self.business_place.reservationAction - @field + @auto_field def reviewSites(self) -> Optional[List[NamedLink]]: return self.business_place.reviewSites - @field + @auto_field def starRating(self) -> Optional[StarRating]: return self.business_place.starRating - @field + @auto_field def tags(self) -> Optional[List[str]]: return self.business_place.tags - @field + @auto_field def telephone(self) -> Optional[str]: return self.business_place.telephone - @field + @auto_field def timezone(self) -> Optional[str]: return self.business_place.timezone - @field + @auto_field def url(self) -> Optional[str]: return self.business_place.url - @field + @auto_field def website(self) -> Optional[str]: return self.business_place.website diff --git a/zyte_common_items/pages/job_posting.py b/zyte_common_items/pages/job_posting.py index dc9b740f..a48a95be 100644 --- a/zyte_common_items/pages/job_posting.py +++ b/zyte_common_items/pages/job_posting.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import BaseSalary, HiringOrganization, JobLocation +from zyte_common_items.fields import auto_field from zyte_common_items.items import JobPosting, JobPostingMetadata from zyte_common_items.processors import ( description_html_processor, @@ -38,86 +39,86 @@ class Processors(Page.Processors): class AutoJobPostingPage(BaseJobPostingPage): job_posting: JobPosting - @field + @auto_field def url(self) -> Optional[str]: return self.job_posting.url - @field + @auto_field def jobPostingId(self) -> Optional[str]: return self.job_posting.jobPostingId - @field + @auto_field def datePublished(self) -> Optional[str]: return self.job_posting.datePublished - @field + @auto_field def datePublishedRaw(self) -> Optional[str]: return self.job_posting.datePublishedRaw - @field + @auto_field def dateModified(self) -> Optional[str]: return self.job_posting.dateModified - @field + @auto_field def dateModifiedRaw(self) -> Optional[str]: return self.job_posting.dateModifiedRaw - @field + @auto_field def validThrough(self) -> Optional[str]: return self.job_posting.validThrough - @field + @auto_field def validThroughRaw(self) -> Optional[str]: return self.job_posting.validThroughRaw - @field + @auto_field def jobTitle(self) -> Optional[str]: return self.job_posting.jobTitle - @field + @auto_field def headline(self) -> Optional[str]: return self.job_posting.headline - @field + @auto_field def jobLocation(self) -> Optional[JobLocation]: return self.job_posting.jobLocation - @field + @auto_field def description(self) -> Optional[str]: return self.job_posting.description - @field + @auto_field def descriptionHtml(self) -> Optional[str]: return self.job_posting.descriptionHtml - @field + @auto_field def employmentType(self) -> Optional[str]: return self.job_posting.employmentType - @field + @auto_field def baseSalary(self) -> Optional[BaseSalary]: return self.job_posting.baseSalary - @field + @auto_field def requirements(self) -> Optional[List[str]]: return self.job_posting.requirements - @field + @auto_field def hiringOrganization(self) -> Optional[HiringOrganization]: return self.job_posting.hiringOrganization - @field + @auto_field def jobStartDate(self) -> Optional[str]: return self.job_posting.jobStartDate - @field + @auto_field def jobStartDateRaw(self) -> Optional[str]: return self.job_posting.jobStartDateRaw - @field + @auto_field def remoteStatus(self) -> Optional[str]: return self.job_posting.remoteStatus - @field + @auto_field def metadata(self) -> Optional[JobPostingMetadata]: return self.job_posting.metadata diff --git a/zyte_common_items/pages/product.py b/zyte_common_items/pages/product.py index db279668..fc297629 100644 --- a/zyte_common_items/pages/product.py +++ b/zyte_common_items/pages/product.py @@ -1,7 +1,7 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import ( AdditionalProperty, @@ -11,6 +11,7 @@ Gtin, Image, ) +from zyte_common_items.fields import auto_field from zyte_common_items.items import Product, ProductMetadata, ProductVariant from zyte_common_items.processors import ( brand_processor, @@ -67,106 +68,106 @@ class Processors(Page.Processors): class AutoProductPage(BaseProductPage): product: Product - @field + @auto_field def additionalProperties(self) -> Optional[List[AdditionalProperty]]: return self.product.additionalProperties - @field + @auto_field def aggregateRating(self) -> Optional[AggregateRating]: return self.product.aggregateRating - @field + @auto_field def availability(self) -> Optional[str]: return self.product.availability - @field + @auto_field def brand(self) -> Optional[Brand]: return self.product.brand - @field + @auto_field def breadcrumbs(self) -> Optional[List[Breadcrumb]]: return self.product.breadcrumbs - @field + @auto_field def canonicalUrl(self) -> Optional[str]: return self.product.canonicalUrl - @field + @auto_field def color(self) -> Optional[str]: return self.product.color - @field + @auto_field def currency(self) -> Optional[str]: return self.product.currency - @field + @auto_field def currencyRaw(self) -> Optional[str]: return self.product.currencyRaw - @field + @auto_field def description(self) -> Optional[str]: return self.product.description - @field + @auto_field def descriptionHtml(self) -> Optional[str]: return self.product.descriptionHtml - @field + @auto_field def features(self) -> Optional[List[str]]: return self.product.features - @field + @auto_field def gtin(self) -> Optional[List[Gtin]]: return self.product.gtin - @field + @auto_field def images(self) -> Optional[List[Image]]: return self.product.images - @field + @auto_field def mainImage(self) -> Optional[Image]: return self.product.mainImage - @field + @auto_field def metadata(self) -> Optional[ProductMetadata]: return self.product.metadata - @field + @auto_field def mpn(self) -> Optional[str]: return self.product.mpn - @field + @auto_field def name(self) -> Optional[str]: return self.product.name - @field + @auto_field def price(self) -> Optional[str]: return self.product.price - @field + @auto_field def productId(self) -> Optional[str]: return self.product.productId - @field + @auto_field def regularPrice(self) -> Optional[str]: return self.product.regularPrice - @field + @auto_field def size(self) -> Optional[str]: return self.product.size - @field + @auto_field def sku(self) -> Optional[str]: return self.product.sku - @field + @auto_field def style(self) -> Optional[str]: return self.product.style - @field + @auto_field def url(self) -> str: return self.product.url - @field + @auto_field def variants(self) -> Optional[List[ProductVariant]]: return self.product.variants diff --git a/zyte_common_items/pages/product_list.py b/zyte_common_items/pages/product_list.py index 9f81b558..e115b4c4 100644 --- a/zyte_common_items/pages/product_list.py +++ b/zyte_common_items/pages/product_list.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import Breadcrumb, Link +from zyte_common_items.fields import auto_field from zyte_common_items.items import ProductFromList, ProductList, ProductListMetadata from zyte_common_items.processors import breadcrumbs_processor @@ -31,34 +32,34 @@ class Processors(Page.Processors): class AutoProductListPage(BaseProductListPage): product_list: ProductList - @field + @auto_field def breadcrumbs(self) -> Optional[List[Breadcrumb]]: return self.product_list.breadcrumbs - @field + @auto_field def canonicalUrl(self) -> Optional[str]: return self.product_list.canonicalUrl - @field + @auto_field def categoryName(self) -> Optional[str]: return self.product_list.categoryName - @field + @auto_field def metadata(self) -> Optional[ProductListMetadata]: return self.product_list.metadata - @field + @auto_field def pageNumber(self) -> Optional[int]: return self.product_list.pageNumber - @field + @auto_field def paginationNext(self) -> Optional[Link]: return self.product_list.paginationNext - @field + @auto_field def products(self) -> Optional[List[ProductFromList]]: return self.product_list.products - @field + @auto_field def url(self) -> Optional[str]: return self.product_list.url diff --git a/zyte_common_items/pages/product_navigation.py b/zyte_common_items/pages/product_navigation.py index c68fded9..7ed00d9b 100644 --- a/zyte_common_items/pages/product_navigation.py +++ b/zyte_common_items/pages/product_navigation.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import ProbabilityRequest, Request +from zyte_common_items.fields import auto_field from zyte_common_items.items import ProductNavigation, ProductNavigationMetadata from zyte_common_items.processors import probability_request_list_processor @@ -31,30 +32,30 @@ class ProductNavigationPage( class AutoProductNavigationPage(BaseProductNavigationPage): product_navigation: ProductNavigation - @field + @auto_field def categoryName(self) -> Optional[str]: return self.product_navigation.categoryName - @field + @auto_field def items(self) -> Optional[List[ProbabilityRequest]]: return self.product_navigation.items - @field + @auto_field def metadata(self) -> Optional[ProductNavigationMetadata]: return self.product_navigation.metadata - @field + @auto_field def nextPage(self) -> Optional[Request]: return self.product_navigation.nextPage - @field + @auto_field def pageNumber(self) -> Optional[int]: return self.product_navigation.pageNumber - @field + @auto_field def subCategories(self) -> Optional[List[ProbabilityRequest]]: return self.product_navigation.subCategories - @field + @auto_field def url(self) -> Optional[str]: return self.product_navigation.url diff --git a/zyte_common_items/pages/real_estate.py b/zyte_common_items/pages/real_estate.py index 804e99cf..e869e703 100644 --- a/zyte_common_items/pages/real_estate.py +++ b/zyte_common_items/pages/real_estate.py @@ -1,7 +1,7 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import ( AdditionalProperty, @@ -10,6 +10,7 @@ Image, RealEstateArea, ) +from zyte_common_items.fields import auto_field from zyte_common_items.items import RealEstate, RealEstateMetadata from ..processors import breadcrumbs_processor, description_processor @@ -39,106 +40,106 @@ class Processors(Page.Processors): class AutoRealEstatePage(BaseRealEstatePage): real_estate: RealEstate - @field + @auto_field def additionalProperties(self) -> Optional[List[AdditionalProperty]]: return self.real_estate.additionalProperties - @field + @auto_field def address(self) -> Optional[Address]: return self.real_estate.address - @field + @auto_field def area(self) -> Optional[RealEstateArea]: return self.real_estate.area - @field + @auto_field def breadcrumbs(self) -> Optional[List[Breadcrumb]]: return self.real_estate.breadcrumbs - @field + @auto_field def currency(self) -> Optional[str]: return self.real_estate.currency - @field + @auto_field def currencyRaw(self) -> Optional[str]: return self.real_estate.currencyRaw - @field + @auto_field def datePublished(self) -> Optional[str]: return self.real_estate.datePublished - @field + @auto_field def datePublishedRaw(self) -> Optional[str]: return self.real_estate.datePublishedRaw - @field + @auto_field def description(self) -> Optional[str]: return self.real_estate.description - @field + @auto_field def images(self) -> Optional[List[Image]]: return self.real_estate.images - @field + @auto_field def mainImage(self) -> Optional[Image]: return self.real_estate.mainImage - @field + @auto_field def metadata(self) -> Optional[RealEstateMetadata]: return self.real_estate.metadata - @field + @auto_field def name(self) -> Optional[str]: return self.real_estate.name - @field + @auto_field def numberOfBathroomsTotal(self) -> Optional[int]: return self.real_estate.numberOfBathroomsTotal - @field + @auto_field def numberOfBedrooms(self) -> Optional[int]: return self.real_estate.numberOfBedrooms - @field + @auto_field def numberOfFullBathrooms(self) -> Optional[int]: return self.real_estate.numberOfFullBathrooms - @field + @auto_field def numberOfPartialBathrooms(self) -> Optional[int]: return self.real_estate.numberOfPartialBathrooms - @field + @auto_field def numberOfRooms(self) -> Optional[int]: return self.real_estate.numberOfRooms - @field + @auto_field def price(self) -> Optional[str]: return self.real_estate.price - @field + @auto_field def propertyType(self) -> Optional[str]: return self.real_estate.propertyType - @field + @auto_field def realEstateId(self) -> Optional[str]: return self.real_estate.realEstateId - @field + @auto_field def rentalPeriod(self) -> Optional[str]: return self.real_estate.rentalPeriod - @field + @auto_field def tradeType(self) -> Optional[str]: return self.real_estate.tradeType - @field + @auto_field def url(self) -> Optional[str]: return self.real_estate.url - @field + @auto_field def virtualTourUrl(self) -> Optional[str]: return self.real_estate.virtualTourUrl - @field + @auto_field def yearBuilt(self) -> Optional[int]: return self.real_estate.yearBuilt diff --git a/zyte_common_items/pages/social_media_post.py b/zyte_common_items/pages/social_media_post.py index 9d05180f..a4f7823c 100644 --- a/zyte_common_items/pages/social_media_post.py +++ b/zyte_common_items/pages/social_media_post.py @@ -1,9 +1,10 @@ from typing import List, Optional import attrs -from web_poet import Returns, field +from web_poet import Returns from zyte_common_items.components import Reactions, SocialMediaPostAuthor, Url +from zyte_common_items.fields import auto_field from zyte_common_items.items import SocialMediaPost, SocialMediaPostMetadata from .base import BasePage, Page @@ -26,38 +27,38 @@ class SocialMediaPostPage( class AutoSocialMediaPostPage(BaseSocialMediaPostPage): social_media_post: SocialMediaPost - @field + @auto_field def url(self) -> Optional[str]: return self.social_media_post.url - @field + @auto_field def postId(self) -> Optional[str]: return self.social_media_post.postId - @field + @auto_field def reactions(self) -> Optional[Reactions]: return self.social_media_post.reactions - @field + @auto_field def text(self) -> Optional[str]: return self.social_media_post.text - @field + @auto_field def datePublished(self) -> Optional[str]: return self.social_media_post.datePublished - @field + @auto_field def hashtags(self) -> Optional[List[str]]: return self.social_media_post.hashtags - @field + @auto_field def mediaUrls(self) -> Optional[List[Url]]: return self.social_media_post.mediaUrls - @field + @auto_field def author(self) -> Optional[SocialMediaPostAuthor]: return self.social_media_post.author - @field + @auto_field def metadata(self) -> Optional[SocialMediaPostMetadata]: return self.social_media_post.metadata From 31bff179762cc9543c1cda490e9c4c4708359c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 18 Jun 2024 10:20:26 +0200 Subject: [PATCH 2/2] Do not encourage the use of @auto_field when changing field processors --- docs/usage/pages.rst | 7 +++---- zyte_common_items/fields.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/usage/pages.rst b/docs/usage/pages.rst index bd8f4515..7c8d21bd 100644 --- a/docs/usage/pages.rst +++ b/docs/usage/pages.rst @@ -112,10 +112,9 @@ using :func:`~zyte_common_items.fields.is_auto_field`: print(is_auto_field(ExtendedProductPage, "bar")) # Raises KeyError If you are overriding a field method but the method continues to return the -value straight from the ``Auto``-prefixed class, e.g. because you are only -overriding field metadata or :ref:`processors `, you should also -set ``auto_field`` to ``True``. Instead of setting it manually in the field -meta, you can replace the :func:`~web_poet.fields.field` decorator with +value straight from the ``Auto``-prefixed class, you should also set +``auto_field`` to ``True``. Instead of setting it manually in the field meta, +you can replace the :func:`~web_poet.fields.field` decorator with :func:`~zyte_common_items.fields.auto_field`: .. autofunction:: zyte_common_items.fields.auto_field diff --git a/zyte_common_items/fields.py b/zyte_common_items/fields.py index ddeac701..2043d2d3 100644 --- a/zyte_common_items/fields.py +++ b/zyte_common_items/fields.py @@ -21,7 +21,7 @@ def auto_field( class ProductPage(AutoProductPage): - @auto_field(out=[str.capitalize]) + @auto_field def name(self): return super().name """