From e4fed50102769d8dd0682d44a41b287e5133ecca Mon Sep 17 00:00:00 2001
From: Andrey Rakhmatullin <wrar@wrar.name>
Date: Mon, 9 Sep 2024 17:05:07 +0500
Subject: [PATCH 1/2] Reword param descriptions.

---
 tests/test_ecommerce.py                    | 13 +++++--------
 zyte_spider_templates/params.py            |  9 ++++-----
 zyte_spider_templates/spiders/ecommerce.py | 19 ++++++-------------
 3 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/tests/test_ecommerce.py b/tests/test_ecommerce.py
index 4e794fb..21268d0 100644
--- a/tests/test_ecommerce.py
+++ b/tests/test_ecommerce.py
@@ -466,16 +466,13 @@ def test_metadata():
                         },
                         "navigation": {
                             "description": (
-                                "Follow pagination, subcategories, and product detail "
-                                "pages. Pagination Only is a better choice if the target "
-                                "URL does not have subcategories, or if Zyte API is "
-                                "misidentifying some URLs as subcategories."
+                                "Follow only subcategories, pagination and product detail pages."
                             ),
-                            "title": "Navigation",
+                            "title": "Category",
                         },
                         "pagination_only": {
                             "description": (
-                                "Follow pagination and product detail pages. Subcategory links are ignored."
+                                "Follow only pagination and product detail pages. Subcategory links are ignored."
                             ),
                             "title": "Pagination Only",
                         },
@@ -534,11 +531,11 @@ def test_metadata():
                     ),
                     "enumMeta": {
                         "browserHtml": {
-                            "description": "Use browser rendering. Often provides the best quality.",
+                            "description": "Use browser rendering. Better quality, but slower and more expensive.",
                             "title": "browserHtml",
                         },
                         "httpResponseBody": {
-                            "description": "Use HTTP responses. Cost-efficient and fast extraction method, which works well on many websites.",
+                            "description": "Use raw responses. Fast and cheap.",
                             "title": "httpResponseBody",
                         },
                     },
diff --git a/zyte_spider_templates/params.py b/zyte_spider_templates/params.py
index ca1158b..54eb900 100644
--- a/zyte_spider_templates/params.py
+++ b/zyte_spider_templates/params.py
@@ -26,11 +26,10 @@
 @document_enum
 class ExtractFrom(str, Enum):
     httpResponseBody: str = "httpResponseBody"
-    """Use HTTP responses. Cost-efficient and fast extraction method, which
-    works well on many websites."""
+    """Use raw responses. Fast and cheap."""
 
     browserHtml: str = "browserHtml"
-    """Use browser rendering. Often provides the best quality."""
+    """Use browser rendering. Better quality, but slower and more expensive."""
 
 
 class ExtractFromParam(BaseModel):
@@ -45,11 +44,11 @@ class ExtractFromParam(BaseModel):
             "enumMeta": {
                 ExtractFrom.browserHtml: {
                     "title": "browserHtml",
-                    "description": "Use browser rendering. Often provides the best quality.",
+                    "description": "Use browser rendering. Better quality, but slower and more expensive.",
                 },
                 ExtractFrom.httpResponseBody: {
                     "title": "httpResponseBody",
-                    "description": "Use HTTP responses. Cost-efficient and fast extraction method, which works well on many websites.",
+                    "description": "Use raw responses. Fast and cheap.",
                 },
             },
         },
diff --git a/zyte_spider_templates/spiders/ecommerce.py b/zyte_spider_templates/spiders/ecommerce.py
index eefb43e..187c4df 100644
--- a/zyte_spider_templates/spiders/ecommerce.py
+++ b/zyte_spider_templates/spiders/ecommerce.py
@@ -45,15 +45,11 @@ class EcommerceCrawlStrategy(str, Enum):
     extract as many products as possible."""
 
     navigation: str = "navigation"
-    """Follow pagination, subcategories, and product detail pages.
-
-    Pagination Only is a better choice if the target URL does not have
-    subcategories, or if Zyte API is misidentifying some URLs as subcategories.
-    """
+    """Follow only subcategories, pagination and product detail pages."""
 
     pagination_only: str = "pagination_only"
-    """Follow pagination and product detail pages. Subcategory links are
-    ignored."""
+    """Follow only pagination and product detail pages. Subcategory links
+    are ignored."""
 
     direct_item: str = "direct_item"
     """Treat input URLs as direct links to product detail pages, and extract an
@@ -84,18 +80,15 @@ class EcommerceCrawlStrategyParam(BaseModel):
                     ),
                 },
                 EcommerceCrawlStrategy.navigation: {
-                    "title": "Navigation",
+                    "title": "Category",
                     "description": (
-                        "Follow pagination, subcategories, and product detail pages. "
-                        "Pagination Only is a better choice if the target URL does not "
-                        "have subcategories, or if Zyte API is misidentifying some URLs "
-                        "as subcategories."
+                        "Follow only subcategories, pagination and product detail pages."
                     ),
                 },
                 EcommerceCrawlStrategy.pagination_only: {
                     "title": "Pagination Only",
                     "description": (
-                        "Follow pagination and product detail pages. Subcategory links are ignored."
+                        "Follow only pagination and product detail pages. Subcategory links are ignored."
                     ),
                 },
                 EcommerceCrawlStrategy.direct_item: {

From 954dd3d0e3ea94108f2107f258fed91b6d684293 Mon Sep 17 00:00:00 2001
From: Andrey Rakhmatullin <wrar@wrar.name>
Date: Fri, 11 Oct 2024 15:13:52 +0500
Subject: [PATCH 2/2] More rewording.

---
 tests/test_ecommerce.py                    | 38 +++++++----
 zyte_spider_templates/spiders/ecommerce.py | 76 +++++++++++++++-------
 2 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/tests/test_ecommerce.py b/tests/test_ecommerce.py
index 25070d8..69d9466 100644
--- a/tests/test_ecommerce.py
+++ b/tests/test_ecommerce.py
@@ -426,36 +426,52 @@ def test_metadata():
                     "enumMeta": {
                         "automatic": {
                             "description": (
-                                "Automatically use the best crawl strategy based on the given "
-                                "URL inputs. If given a homepage URL, it would attempt to crawl "
-                                "as many products it can discover. Otherwise, it attempt to "
-                                "crawl the products on a given page category."
+                                "Automatically select the best approach. A good "
+                                "default for most use cases. Currently it uses "
+                                "heuristics only on the homepages of websites (similar "
+                                "to Full strategy), and follows product, category and "
+                                "pagination links on other pages (similar to Navigation "
+                                "strategy)."
                             ),
                             "title": "Automatic",
                         },
                         "direct_item": {
                             "description": (
-                                "Treat input URLs as direct links to product detail pages, and "
-                                "extract a product from each."
+                                "Directly extract products from the provided URLs, "
+                                "without any crawling. To use this strategy, pass "
+                                "individual product URLs to the spider, not the "
+                                "website or product category URLs. Common use cases "
+                                "are product monitoring and batch extraction."
                             ),
                             "title": "Direct URLs to Product",
                         },
                         "full": {
                             "description": (
-                                "Follow most links within the domain of URL in an attempt "
-                                "to discover and extract as many products as possible."
+                                "Follow most links on the website to discover and "
+                                "extract as many products as possible. If an input URL "
+                                "is a link to a particular category on a website, the "
+                                "spider may crawl products outside this category. Try "
+                                "this strategy if other strategies miss items."
                             ),
                             "title": "Full",
                         },
                         "navigation": {
                             "description": (
-                                "Follow only subcategories, pagination and product detail pages."
+                                "Follow pagination, subcategories, and product links "
+                                "only. If an input URL is a link to a particular "
+                                "category on a website, the spider will try to stay "
+                                "within this category."
                             ),
-                            "title": "Category",
+                            "title": "Navigation",
                         },
                         "pagination_only": {
                             "description": (
-                                "Follow only pagination and product detail pages. Subcategory links are ignored."
+                                "Follow pagination and product links only. This "
+                                "strategy is similar to Navigation, but it doesn't "
+                                "support subcategories. Use it when you need the "
+                                "spider to stay within a certain category on a "
+                                "website, but Automatic or Navigation strategies fail "
+                                "to do so because of misclassified subcategory links."
                             ),
                             "title": "Pagination Only",
                         },
diff --git a/zyte_spider_templates/spiders/ecommerce.py b/zyte_spider_templates/spiders/ecommerce.py
index 3ecfb7b..5e87266 100644
--- a/zyte_spider_templates/spiders/ecommerce.py
+++ b/zyte_spider_templates/spiders/ecommerce.py
@@ -40,26 +40,42 @@
 class EcommerceCrawlStrategy(str, Enum):
     automatic: str = "automatic"
     """
-    Automatically use the best crawl strategy based on the given URL inputs.
-
-    If given a homepage URL, it would attempt to crawl as many products it can discover.
-    Otherwise, it attempt to crawl the products on a given page category.
+    Automatically select the best approach. A good default for most use cases.
+    Currently it uses heuristics only on the homepages of websites (similar to
+    Full strategy), and follows product, category and pagination links on other
+    pages (similar to Navigation strategy).
     """
 
     full: str = "full"
-    """Follow most links within the domain of URL in an attempt to discover and
-    extract as many products as possible."""
+    """
+    Follow most links on the website to discover and extract as many products
+    as possible. If an input URL is a link to a particular category on a
+    website, the spider may crawl products outside this category. Try this
+    strategy if other strategies miss items.
+    """
 
     navigation: str = "navigation"
-    """Follow only subcategories, pagination and product detail pages."""
+    """
+    Follow pagination, subcategories, and product links only. If an input URL
+    is a link to a particular category on a website, the spider will try to
+    stay within this category.
+    """
 
     pagination_only: str = "pagination_only"
-    """Follow only pagination and product detail pages. Subcategory links
-    are ignored."""
+    """
+    Follow pagination and product links only. This strategy is similar to
+    Navigation, but it doesn't support subcategories. Use it when you need the
+    spider to stay within a certain category on a website, but Automatic or
+    Navigation strategies fail to do so because of misclassified subcategory links.
+    """
 
     direct_item: str = "direct_item"
-    """Treat input URLs as direct links to product detail pages, and extract an
-    product from each."""
+    """
+    Directly extract products from the provided URLs, without any crawling. To
+    use this strategy, pass individual product URLs to the spider, not the
+    website or product category URLs. Common use cases are product monitoring
+    and batch extraction.
+    """
 
 
 class EcommerceCrawlStrategyParam(BaseModel):
@@ -71,37 +87,53 @@ class EcommerceCrawlStrategyParam(BaseModel):
             "enumMeta": {
                 EcommerceCrawlStrategy.automatic: {
                     "description": (
-                        "Automatically use the best crawl strategy based on the given "
-                        "URL inputs. If given a homepage URL, it would attempt to crawl "
-                        "as many products it can discover. Otherwise, it attempt to "
-                        "crawl the products on a given page category."
+                        "Automatically select the best approach. A good "
+                        "default for most use cases. Currently it uses "
+                        "heuristics only on the homepages of websites (similar "
+                        "to Full strategy), and follows product, category and "
+                        "pagination links on other pages (similar to Navigation "
+                        "strategy)."
                     ),
                     "title": "Automatic",
                 },
                 EcommerceCrawlStrategy.full: {
                     "title": "Full",
                     "description": (
-                        "Follow most links within the domain of URL in an attempt to "
-                        "discover and extract as many products as possible."
+                        "Follow most links on the website to discover and "
+                        "extract as many products as possible. If an input URL "
+                        "is a link to a particular category on a website, the "
+                        "spider may crawl products outside this category. Try "
+                        "this strategy if other strategies miss items."
                     ),
                 },
                 EcommerceCrawlStrategy.navigation: {
-                    "title": "Category",
+                    "title": "Navigation",
                     "description": (
-                        "Follow only subcategories, pagination and product detail pages."
+                        "Follow pagination, subcategories, and product links "
+                        "only. If an input URL is a link to a particular "
+                        "category on a website, the spider will try to stay "
+                        "within this category."
                     ),
                 },
                 EcommerceCrawlStrategy.pagination_only: {
                     "title": "Pagination Only",
                     "description": (
-                        "Follow only pagination and product detail pages. Subcategory links are ignored."
+                        "Follow pagination and product links only. This "
+                        "strategy is similar to Navigation, but it doesn't "
+                        "support subcategories. Use it when you need the "
+                        "spider to stay within a certain category on a "
+                        "website, but Automatic or Navigation strategies fail "
+                        "to do so because of misclassified subcategory links."
                     ),
                 },
                 EcommerceCrawlStrategy.direct_item: {
                     "title": "Direct URLs to Product",
                     "description": (
-                        "Treat input URLs as direct links to product detail pages, and "
-                        "extract a product from each."
+                        "Directly extract products from the provided URLs, "
+                        "without any crawling. To use this strategy, pass "
+                        "individual product URLs to the spider, not the "
+                        "website or product category URLs. Common use cases "
+                        "are product monitoring and batch extraction."
                     ),
                 },
             },