From b7f3298f2509fdaabe2264035df276e9fd1c0f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 17 Nov 2023 14:03:07 +0100 Subject: [PATCH] Add tests for new scenarios --- scrapy_zyte_api/_request_fingerprinter.py | 2 +- tests/test_request_fingerprinter.py | 30 ++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/scrapy_zyte_api/_request_fingerprinter.py b/scrapy_zyte_api/_request_fingerprinter.py index 0fc138a2..3a023968 100644 --- a/scrapy_zyte_api/_request_fingerprinter.py +++ b/scrapy_zyte_api/_request_fingerprinter.py @@ -46,7 +46,7 @@ def _normalize_params(self, api_params): if "httpRequestText" in api_params: api_params["httpRequestBody"] = b64encode( - api_params["httpRequestText"].encode() + api_params.pop("httpRequestText").encode() ).decode() for key, value in _REQUEST_PARAMS.items(): diff --git a/tests/test_request_fingerprinter.py b/tests/test_request_fingerprinter.py index 1508f08c..d21013d0 100644 --- a/tests/test_request_fingerprinter.py +++ b/tests/test_request_fingerprinter.py @@ -318,7 +318,8 @@ def merge_dicts(*dicts): "params,match", ( # As long as browserHtml or screenshot are True, different fragments - # make for different fingerprints, regardless of other parameters. + # make for different fingerprints, regardless of other parameters. Same + # for extraction types if browserHtml is set in *Options.extractFrom. *( ( merge_dicts(body, headers, unknown, browser), @@ -345,10 +346,12 @@ def merge_dicts(*dicts): {"browserHtml": True, "screenshot": False}, {"browserHtml": False, "screenshot": True}, {"browserHtml": True, "screenshot": True}, + {"product": True, "productOptions": {"extractFrom": "browserHtml"}}, ) ), # If neither browserHtml nor screenshot are enabled, different - # fragments do *not* make for different fingerprints. + # fragments do *not* make for different fingerprints. Same for + # extraction types if browserHtml is not set in # *Options.extractFrom. *( ( merge_dicts(body, headers, unknown, browser), @@ -374,6 +377,11 @@ def merge_dicts(*dicts): {"browserHtml": False}, {"screenshot": False}, {"browserHtml": False, "screenshot": False}, + {"product": True}, + { + "product": True, + "productOptions": {"extractFrom": "httpResponseBody"}, + }, ) ), ), @@ -393,7 +401,7 @@ def test_url_fragments(params, match): assert fingerprint1 != fingerprint2 -def test_autoextract(): +def test_extract_types(): crawler = get_crawler() fingerprinter = create_instance( ScrapyZyteAPIRequestFingerprinter, settings=crawler.settings, crawler=crawler @@ -405,3 +413,19 @@ def test_autoextract(): ) fingerprint2 = fingerprinter.fingerprint(request2) assert fingerprint1 != fingerprint2 + + +def test_request_body(): + crawler = get_crawler() + fingerprinter = create_instance( + ScrapyZyteAPIRequestFingerprinter, settings=crawler.settings, crawler=crawler + ) + request1 = Request( + "https://toscrape.com", meta={"zyte_api": {"httpRequestBody": "Zm9v"}} + ) + fingerprint1 = fingerprinter.fingerprint(request1) + request2 = Request( + "https://toscrape.com", meta={"zyte_api": {"httpRequestText": "foo"}} + ) + fingerprint2 = fingerprinter.fingerprint(request2) + assert fingerprint1 == fingerprint2