From 5cebc2c3bb8baa21fa3332a4f36353cc06070935 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Thu, 21 Sep 2023 11:43:18 +0300 Subject: [PATCH 01/28] send new user_agent with api_params --- scrapy_zyte_api/handler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 8cbdec90..2862ae48 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -2,7 +2,7 @@ import logging from copy import deepcopy from typing import Generator, Optional, Union - +from importlib.metadata import version from scrapy import Spider, signals from scrapy.core.downloader.handlers.http import HTTPDownloadHandler from scrapy.crawler import Crawler @@ -24,6 +24,10 @@ logger = logging.getLogger(__name__) +def _user_agent(package): + return f'{package}/{version(package)}' + + def _truncate_str(obj, index, text, limit): if len(text) <= limit: return @@ -208,6 +212,8 @@ async def _download_request( else: retrying = self._retry_policy self._log_request(api_params) + + api_params["user-agent"] = _user_agent('scrapy-zyte-api') try: api_response = await self._client.request_raw( api_params, From 75e1114b3b13eed35d2219f9e899f3ce0f96aa81 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 15:48:28 +0300 Subject: [PATCH 02/28] add _user_agent() to utils --- scrapy_zyte_api/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scrapy_zyte_api/utils.py b/scrapy_zyte_api/utils.py index 119562b3..e2ec314e 100644 --- a/scrapy_zyte_api/utils.py +++ b/scrapy_zyte_api/utils.py @@ -1,6 +1,10 @@ +from importlib.metadata import version + import scrapy from packaging.version import Version +PACKAGE_NAME = "scrapy-zyte-api" + _SCRAPY_VERSION = Version(scrapy.__version__) _SCRAPY_2_1_0 = Version("2.1.0") _SCRAPY_2_4_0 = Version("2.4.0") @@ -17,3 +21,10 @@ _RESPONSE_HAS_ATTRIBUTES = _SCRAPY_VERSION >= _SCRAPY_2_6_0 _RESPONSE_HAS_IP_ADDRESS = _SCRAPY_VERSION >= _SCRAPY_2_1_0 _RESPONSE_HAS_PROTOCOL = _SCRAPY_VERSION >= _SCRAPY_2_5_0 + + +def _user_agent(custom_user_agent): + user_agent = f"{PACKAGE_NAME}/{version(PACKAGE_NAME)}" + if custom_user_agent: + return f"{user_agent}, {custom_user_agent}" + return user_agent From 211a1b53a26f6ab15641d2d927923294493b8d3f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 15:49:46 +0300 Subject: [PATCH 03/28] remove _user_agent() from handler --- scrapy_zyte_api/handler.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 2862ae48..820f9a19 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -2,7 +2,7 @@ import logging from copy import deepcopy from typing import Generator, Optional, Union -from importlib.metadata import version + from scrapy import Spider, signals from scrapy.core.downloader.handlers.http import HTTPDownloadHandler from scrapy.crawler import Crawler @@ -24,10 +24,6 @@ logger = logging.getLogger(__name__) -def _user_agent(package): - return f'{package}/{version(package)}' - - def _truncate_str(obj, index, text, limit): if len(text) <= limit: return @@ -213,7 +209,6 @@ async def _download_request( retrying = self._retry_policy self._log_request(api_params) - api_params["user-agent"] = _user_agent('scrapy-zyte-api') try: api_response = await self._client.request_raw( api_params, From 31267dcac30ddcf8842b656ea277c805891325d0 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 15:50:42 +0300 Subject: [PATCH 04/28] add user_agent to AsyncClient --- scrapy_zyte_api/handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 820f9a19..9e9bf17b 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -20,6 +20,7 @@ from ._params import _ParamParser from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response +from .utils import _user_agent logger = logging.getLogger(__name__) From 610477add1681f063998b4c4fc870923a55e345f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 15:51:15 +0300 Subject: [PATCH 05/28] extract custom ua from settings and send to client --- scrapy_zyte_api/handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 9e9bf17b..b584f082 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -130,6 +130,7 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), + user_agent=_user_agent(settings.get("_USER_AGENT")), ) except NoApiKey: logger.warning( From 1b5b14fa2c521d80d43db9133caa2310aeefece8 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 16:08:09 +0300 Subject: [PATCH 06/28] move package name to function --- scrapy_zyte_api/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scrapy_zyte_api/utils.py b/scrapy_zyte_api/utils.py index e2ec314e..4016c860 100644 --- a/scrapy_zyte_api/utils.py +++ b/scrapy_zyte_api/utils.py @@ -3,8 +3,6 @@ import scrapy from packaging.version import Version -PACKAGE_NAME = "scrapy-zyte-api" - _SCRAPY_VERSION = Version(scrapy.__version__) _SCRAPY_2_1_0 = Version("2.1.0") _SCRAPY_2_4_0 = Version("2.4.0") @@ -24,7 +22,8 @@ def _user_agent(custom_user_agent): - user_agent = f"{PACKAGE_NAME}/{version(PACKAGE_NAME)}" + package_name = "scrapy-zyte-api" + user_agent = f"{package_name}/{version(package_name)}" if custom_user_agent: return f"{user_agent}, {custom_user_agent}" return user_agent From 7340a142866beb96032cbf79567dff95c1088da3 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 19:30:38 +0300 Subject: [PATCH 07/28] add test for _user_agent() --- tests/test_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..16666416 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,19 @@ +import pytest +from scrapy_zyte_api.utils import _user_agent, version + + +@pytest.mark.parametrize( + "custom_user_agent,expected", + ( + ( + None, + f'scrapy-zyte-api/{version("scrapy-zyte-api")}', + ), + ( + 'zyte-crawlers/0.0.1', + f'scrapy-zyte-api/{version("scrapy-zyte-api")}, zyte-crawlers/0.0.1', + ), + ), +) +def test_user_agent(custom_user_agent, expected): + assert _user_agent(custom_user_agent) == expected From 273cc79d14a00a1e9112477486c9a30c02dfc8b8 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Fri, 22 Sep 2023 19:33:00 +0300 Subject: [PATCH 08/28] formatting --- tests/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 16666416..49809ec5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ import pytest + from scrapy_zyte_api.utils import _user_agent, version @@ -10,7 +11,7 @@ f'scrapy-zyte-api/{version("scrapy-zyte-api")}', ), ( - 'zyte-crawlers/0.0.1', + "zyte-crawlers/0.0.1", f'scrapy-zyte-api/{version("scrapy-zyte-api")}, zyte-crawlers/0.0.1', ), ), From f61a7147217890d60345030319c380a0b2e2a52d Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 09:30:38 +0300 Subject: [PATCH 09/28] remove old test for user_agent --- tests/test_utils.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index 49809ec5..00000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest - -from scrapy_zyte_api.utils import _user_agent, version - - -@pytest.mark.parametrize( - "custom_user_agent,expected", - ( - ( - None, - f'scrapy-zyte-api/{version("scrapy-zyte-api")}', - ), - ( - "zyte-crawlers/0.0.1", - f'scrapy-zyte-api/{version("scrapy-zyte-api")}, zyte-crawlers/0.0.1', - ), - ), -) -def test_user_agent(custom_user_agent, expected): - assert _user_agent(custom_user_agent) == expected From 18f4269d852862547ee8e45392affac4de78e46f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 09:31:54 +0300 Subject: [PATCH 10/28] add test for user_agent (for _build_client) --- tests/test_handler.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_handler.py b/tests/test_handler.py index c56a83e9..cf678bcc 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -10,13 +10,16 @@ from pytest_twisted import ensureDeferred from scrapy import Request from scrapy.exceptions import NotConfigured +from scrapy.settings import Settings from scrapy.utils.misc import create_instance from scrapy.utils.test import get_crawler from zyte_api.aio.client import AsyncClient from zyte_api.aio.retry import RetryFactory from zyte_api.constants import API_URL +from zyte_api.utils import USER_AGENT as _USER_AGENT from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler +from scrapy_zyte_api.utils import USER_AGENT from . import DEFAULT_CLIENT_CONCURRENCY, SETTINGS, UNSET, make_handler, set_env @@ -456,3 +459,25 @@ def test_trust_env(enabled): crawler=crawler, ) assert handler._session._trust_env == enabled + + +@pytest.mark.parametrize( + "user_agent,expected", + ( + ( + None, + f'{_USER_AGENT}, {USER_AGENT}', + ), + ( + "zyte-crawlers/0.0.1", + f'{_USER_AGENT}, {USER_AGENT}, zyte-crawlers/0.0.1', + ), + ), +) +def test_user_agent_for_build_client(user_agent, expected): + settings = Settings({ + **SETTINGS, + "_USER_AGENT": user_agent, + }) + client = ScrapyZyteAPIDownloadHandler._build_client(settings) + assert client.user_agent == expected From d21bea8fbace9233267d7f019fdba536a2d98252 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 09:32:23 +0300 Subject: [PATCH 11/28] se USER_AGENT as constant --- scrapy_zyte_api/utils.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scrapy_zyte_api/utils.py b/scrapy_zyte_api/utils.py index 4016c860..aed4c103 100644 --- a/scrapy_zyte_api/utils.py +++ b/scrapy_zyte_api/utils.py @@ -3,6 +3,8 @@ import scrapy from packaging.version import Version +USER_AGENT = f"scrapy-zyte-api/{version('scrapy-zyte-api')}" + _SCRAPY_VERSION = Version(scrapy.__version__) _SCRAPY_2_1_0 = Version("2.1.0") _SCRAPY_2_4_0 = Version("2.4.0") @@ -19,11 +21,3 @@ _RESPONSE_HAS_ATTRIBUTES = _SCRAPY_VERSION >= _SCRAPY_2_6_0 _RESPONSE_HAS_IP_ADDRESS = _SCRAPY_VERSION >= _SCRAPY_2_1_0 _RESPONSE_HAS_PROTOCOL = _SCRAPY_VERSION >= _SCRAPY_2_5_0 - - -def _user_agent(custom_user_agent): - package_name = "scrapy-zyte-api" - user_agent = f"{package_name}/{version(package_name)}" - if custom_user_agent: - return f"{user_agent}, {custom_user_agent}" - return user_agent From b712e6a5c0e25f51d413904999980585a20c126f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 09:33:12 +0300 Subject: [PATCH 12/28] send user_agent to client --- scrapy_zyte_api/handler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index b584f082..23fde820 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -17,10 +17,11 @@ from zyte_api.aio.errors import RequestError from zyte_api.apikey import NoApiKey from zyte_api.constants import API_URL +from zyte_api.utils import USER_AGENT as _USER_AGENT from ._params import _ParamParser from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response -from .utils import _user_agent +from .utils import USER_AGENT logger = logging.getLogger(__name__) @@ -130,7 +131,7 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), - user_agent=_user_agent(settings.get("_USER_AGENT")), + user_agent=f'{_USER_AGENT}, {USER_AGENT}, {settings.get("_USER_AGENT", "")}'.rstrip(", "), ) except NoApiKey: logger.warning( From a9a7925b8dc23e1094300138b0285bec20382251 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 23:10:35 +0300 Subject: [PATCH 13/28] change order for user agent in test/formatting --- tests/test_handler.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_handler.py b/tests/test_handler.py index cf678bcc..89a5de3e 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -16,7 +16,7 @@ from zyte_api.aio.client import AsyncClient from zyte_api.aio.retry import RetryFactory from zyte_api.constants import API_URL -from zyte_api.utils import USER_AGENT as _USER_AGENT +from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.utils import USER_AGENT @@ -466,18 +466,20 @@ def test_trust_env(enabled): ( ( None, - f'{_USER_AGENT}, {USER_AGENT}', + f"{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", ), ( "zyte-crawlers/0.0.1", - f'{_USER_AGENT}, {USER_AGENT}, zyte-crawlers/0.0.1', + f"zyte-crawlers/0.0.1 {USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", ), ), ) def test_user_agent_for_build_client(user_agent, expected): - settings = Settings({ - **SETTINGS, - "_USER_AGENT": user_agent, - }) + settings = Settings( + { + **SETTINGS, + "_USER_AGENT": user_agent, + } + ) client = ScrapyZyteAPIDownloadHandler._build_client(settings) assert client.user_agent == expected From e4a63624b6cb3b8fe90c8d6622db392759f0c052 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Mon, 25 Sep 2023 23:11:20 +0300 Subject: [PATCH 14/28] change order and delimeter for user agent in client --- scrapy_zyte_api/handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 23fde820..2378c76c 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -17,7 +17,7 @@ from zyte_api.aio.errors import RequestError from zyte_api.apikey import NoApiKey from zyte_api.constants import API_URL -from zyte_api.utils import USER_AGENT as _USER_AGENT +from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT from ._params import _ParamParser from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response @@ -131,7 +131,7 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), - user_agent=f'{_USER_AGENT}, {USER_AGENT}, {settings.get("_USER_AGENT", "")}'.rstrip(", "), + user_agent=f'{settings.get("_USER_AGENT", "")} {USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}'.lstrip(), ) except NoApiKey: logger.warning( From 1a945cb1170d043b831ba522ab7830aea4dfb992 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 08:55:59 +0300 Subject: [PATCH 15/28] rename _USER_AGENT --- scrapy_zyte_api/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 2378c76c..7ae87aa3 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -131,7 +131,7 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), - user_agent=f'{settings.get("_USER_AGENT", "")} {USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}'.lstrip(), + user_agent=settings.get("_ZYTE_API_USER_AGENT") or f'{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}', ) except NoApiKey: logger.warning( From 2ba524136ac2784735f549c139f761d51a04da60 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 08:57:02 +0300 Subject: [PATCH 16/28] _ZYTE_API_USER_AGENT rewrites any other user-agent --- tests/test_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_handler.py b/tests/test_handler.py index 89a5de3e..53e5d126 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -470,7 +470,7 @@ def test_trust_env(enabled): ), ( "zyte-crawlers/0.0.1", - f"zyte-crawlers/0.0.1 {USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", + "zyte-crawlers/0.0.1", ), ), ) @@ -478,7 +478,7 @@ def test_user_agent_for_build_client(user_agent, expected): settings = Settings( { **SETTINGS, - "_USER_AGENT": user_agent, + "_ZYTE_API_USER_AGENT": user_agent, } ) client = ScrapyZyteAPIDownloadHandler._build_client(settings) From b941f11445ed236ca909e6aa8d47e5c11348de12 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:10:53 +0300 Subject: [PATCH 17/28] add __version__.py --- scrapy_zyte_api/__version__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 scrapy_zyte_api/__version__.py diff --git a/scrapy_zyte_api/__version__.py b/scrapy_zyte_api/__version__.py new file mode 100644 index 00000000..ae4865cf --- /dev/null +++ b/scrapy_zyte_api/__version__.py @@ -0,0 +1 @@ +__version__ = '0.11.1' From dd9bddc183ec491998f17cdc255449bcbce1296f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:11:53 +0300 Subject: [PATCH 18/28] fetching version from __version__.py --- setup.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cc4067cf..c65e2259 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,18 @@ +import os import setuptools + +def get_version(): + about = {} + here = os.path.abspath(os.path.dirname(__file__)) + with open(os.path.join(here, 'scrapy_zyte_api/__version__.py')) as f: + exec(f.read(), about) + return about['__version__'] + + setuptools.setup( name="scrapy-zyte-api", - version="0.11.1", + version=get_version(), description="Client library to process URLs through Zyte API", long_description=open("README.rst").read(), long_description_content_type="text/x-rst", From a1c51aad45f49634c6a6af15e9ce935a642fbc2b Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:20:09 +0300 Subject: [PATCH 19/28] formatting --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c65e2259..705eb67c 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,14 @@ import os + import setuptools def get_version(): about = {} here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, 'scrapy_zyte_api/__version__.py')) as f: + with open(os.path.join(here, "scrapy_zyte_api/__version__.py")) as f: exec(f.read(), about) - return about['__version__'] + return about["__version__"] setuptools.setup( From d13670c9cdb21831d9c7d440fd8be9f142d8fe61 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:20:27 +0300 Subject: [PATCH 20/28] formatting --- scrapy_zyte_api/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapy_zyte_api/__version__.py b/scrapy_zyte_api/__version__.py index ae4865cf..fee46bd8 100644 --- a/scrapy_zyte_api/__version__.py +++ b/scrapy_zyte_api/__version__.py @@ -1 +1 @@ -__version__ = '0.11.1' +__version__ = "0.11.1" From 327e89f35388d7a8f7ccdd2c473ffeb54e12b064 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:21:09 +0300 Subject: [PATCH 21/28] fix changing version in bump --- .bumpversion.cfg | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index cf82a9fc..d445c9e3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -4,6 +4,4 @@ commit = True tag = True tag_name = {new_version} -[bumpversion:file:setup.py] -search = version="{current_version}", -replace = version="{new_version}", +[bumpversion:file:scrapy_zyte_api/__version__.py] From a928a2614c55571677f84cbedc60ab2bab8ab55f Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:22:05 +0300 Subject: [PATCH 22/28] formatting --- scrapy_zyte_api/handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 7ae87aa3..51d4ea9b 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -131,7 +131,8 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), - user_agent=settings.get("_ZYTE_API_USER_AGENT") or f'{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}', + user_agent=settings.get("_ZYTE_API_USER_AGENT") + or f"{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", ) except NoApiKey: logger.warning( From 43f50eaf2e741017ea9e4d86fb5a6c3552322f54 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 09:22:38 +0300 Subject: [PATCH 23/28] set version to USER_AGENT from __version__.py --- scrapy_zyte_api/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapy_zyte_api/utils.py b/scrapy_zyte_api/utils.py index aed4c103..02c2011c 100644 --- a/scrapy_zyte_api/utils.py +++ b/scrapy_zyte_api/utils.py @@ -1,9 +1,9 @@ -from importlib.metadata import version - import scrapy from packaging.version import Version -USER_AGENT = f"scrapy-zyte-api/{version('scrapy-zyte-api')}" +from .__version__ import __version__ + +USER_AGENT = f"scrapy-zyte-api/{__version__}" _SCRAPY_VERSION = Version(scrapy.__version__) _SCRAPY_2_1_0 = Version("2.1.0") From 32c4b975cd7edc5d71549c6f5fb6740ec046bfd9 Mon Sep 17 00:00:00 2001 From: Shevchenko Taras Date: Wed, 27 Sep 2023 12:20:12 +0300 Subject: [PATCH 24/28] Set new version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrián Chaves --- scrapy_zyte_api/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapy_zyte_api/__version__.py b/scrapy_zyte_api/__version__.py index fee46bd8..ea370a8e 100644 --- a/scrapy_zyte_api/__version__.py +++ b/scrapy_zyte_api/__version__.py @@ -1 +1 @@ -__version__ = "0.11.1" +__version__ = "0.12.0" From 5e997dd2dce20996e73e94aa18ee91253a782c55 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 13:48:39 +0300 Subject: [PATCH 25/28] fix test to use full user_agent --- tests/test_handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_handler.py b/tests/test_handler.py index 53e5d126..0a27d3d0 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -16,7 +16,6 @@ from zyte_api.aio.client import AsyncClient from zyte_api.aio.retry import RetryFactory from zyte_api.constants import API_URL -from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.utils import USER_AGENT @@ -466,7 +465,7 @@ def test_trust_env(enabled): ( ( None, - f"{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", + USER_AGENT, ), ( "zyte-crawlers/0.0.1", From d3fad2420ae897144ef1a65e090b4afc92ece75c Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 13:49:32 +0300 Subject: [PATCH 26/28] construct user-agent at once --- scrapy_zyte_api/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapy_zyte_api/utils.py b/scrapy_zyte_api/utils.py index 02c2011c..17776ad2 100644 --- a/scrapy_zyte_api/utils.py +++ b/scrapy_zyte_api/utils.py @@ -1,9 +1,10 @@ import scrapy from packaging.version import Version +from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT from .__version__ import __version__ -USER_AGENT = f"scrapy-zyte-api/{__version__}" +USER_AGENT = f"scrapy-zyte-api/{__version__} {PYTHON_ZYTE_API_USER_AGENT}" _SCRAPY_VERSION = Version(scrapy.__version__) _SCRAPY_2_1_0 = Version("2.1.0") From c77966280dbf7d11c86880028e3ba1841970b7d5 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 13:51:25 +0300 Subject: [PATCH 27/28] using USER_AGENT as default user agent --- scrapy_zyte_api/handler.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 51d4ea9b..fda7f40a 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -17,7 +17,6 @@ from zyte_api.aio.errors import RequestError from zyte_api.apikey import NoApiKey from zyte_api.constants import API_URL -from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT from ._params import _ParamParser from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response @@ -131,8 +130,7 @@ def _build_client(settings): api_key=settings.get("ZYTE_API_KEY") or None, api_url=settings.get("ZYTE_API_URL") or API_URL, n_conn=settings.getint("CONCURRENT_REQUESTS"), - user_agent=settings.get("_ZYTE_API_USER_AGENT") - or f"{USER_AGENT} {PYTHON_ZYTE_API_USER_AGENT}", + user_agent=settings.get("_ZYTE_API_USER_AGENT", default=USER_AGENT), ) except NoApiKey: logger.warning( From 7b72bfe822b51d85dbb4159e5de68f7a40d016e3 Mon Sep 17 00:00:00 2001 From: PyExplorer Date: Wed, 27 Sep 2023 15:03:10 +0300 Subject: [PATCH 28/28] bump zyte-api version to the latest with user-agent --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 705eb67c..19561aa5 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def get_version(): install_requires=[ "packaging>=20.0", "scrapy>=2.0.1", - "zyte-api>=0.4.0", + "zyte-api>=0.4.7", ], extras_require={ # Sync with [testenv:provider-pinned] @ tox.ini