-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement ZYTE_API_PRESERVE_DELAY (#204)
Co-authored-by: Georgiy Zatserklianyi <[email protected]> Co-authored-by: Adrián Chaves <[email protected]>
- Loading branch information
1 parent
beaf8ca
commit 5856129
Showing
3 changed files
with
53 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,16 +39,26 @@ def spider_output_processor(middleware, request, spider): | |
|
||
|
||
@pytest.mark.parametrize( | ||
"mw_cls,processor", | ||
["mw_cls", "processor"], | ||
[ | ||
(ScrapyZyteAPIDownloaderMiddleware, request_processor), | ||
(ScrapyZyteAPISpiderMiddleware, start_request_processor), | ||
(ScrapyZyteAPISpiderMiddleware, spider_output_processor), | ||
], | ||
) | ||
@pytest.mark.parametrize( | ||
["settings", "preserve"], | ||
[ | ||
({}, True), | ||
({"ZYTE_API_PRESERVE_DELAY": False}, False), | ||
({"ZYTE_API_PRESERVE_DELAY": True}, True), | ||
({"AUTOTHROTTLE_ENABLED": True}, False), | ||
({"AUTOTHROTTLE_ENABLED": True, "ZYTE_API_PRESERVE_DELAY": True}, True), | ||
], | ||
) | ||
@ensureDeferred | ||
async def test_autothrottle_handling(mw_cls, processor): | ||
crawler = get_crawler() | ||
async def test_preserve_delay(mw_cls, processor, settings, preserve): | ||
crawler = get_crawler(settings_dict=settings) | ||
await crawler.crawl("a") | ||
spider = crawler.spider | ||
|
||
|
@@ -64,13 +74,13 @@ async def test_autothrottle_handling(mw_cls, processor): | |
_, slot = crawler.engine.downloader._get_slot(request, spider) | ||
assert slot.delay == spider.download_delay | ||
|
||
# On Zyte API requests, the download slot is changed, and its delay is set | ||
# to 0. | ||
# On Zyte API requests, the download slot is changed, and its delay may be | ||
# set to 0 depending on settings. | ||
request = Request("https://example.com", meta={"zyte_api": {}}) | ||
processor(middleware, request, spider) | ||
assert request.meta["download_slot"] == "[email protected]" | ||
_, slot = crawler.engine.downloader._get_slot(request, spider) | ||
assert slot.delay == 0 | ||
assert slot.delay == (5 if preserve else 0) | ||
|
||
# Requests that happen to already have the right download slot assigned | ||
# work the same. | ||
|
@@ -79,17 +89,18 @@ async def test_autothrottle_handling(mw_cls, processor): | |
processor(middleware, request, spider) | ||
assert request.meta["download_slot"] == "[email protected]" | ||
_, slot = crawler.engine.downloader._get_slot(request, spider) | ||
assert slot.delay == 0 | ||
assert slot.delay == (5 if preserve else 0) | ||
|
||
# The slot delay is set to 0 every time a request for the slot is | ||
# The slot delay is taken into account every time a request for the slot is | ||
# processed, so even if it gets changed later on somehow, the downloader | ||
# middleware will reset it to 0 again the next time it processes a request. | ||
# middleware may reset it to 0 again the next time it processes a request | ||
# depending on settings. | ||
slot.delay = 10 | ||
request = Request("https://example.com", meta={"zyte_api": {}}) | ||
processor(middleware, request, spider) | ||
assert request.meta["download_slot"] == "[email protected]" | ||
_, slot = crawler.engine.downloader._get_slot(request, spider) | ||
assert slot.delay == 0 | ||
assert slot.delay == (10 if preserve else 0) | ||
|
||
await crawler.stop() | ||
|
||
|