Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Identify scrapy-zyte-api usage via custom user-agent #130

Merged
merged 29 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5cebc2c
send new user_agent with api_params
PyExplorer Sep 21, 2023
75e1114
add _user_agent() to utils
PyExplorer Sep 22, 2023
211a1b5
remove _user_agent() from handler
PyExplorer Sep 22, 2023
31267dc
add user_agent to AsyncClient
PyExplorer Sep 22, 2023
610477a
extract custom ua from settings and send to client
PyExplorer Sep 22, 2023
1b5b14f
move package name to function
PyExplorer Sep 22, 2023
7340a14
add test for _user_agent()
PyExplorer Sep 22, 2023
273cc79
formatting
PyExplorer Sep 22, 2023
f61a714
remove old test for user_agent
PyExplorer Sep 25, 2023
18f4269
add test for user_agent (for _build_client)
PyExplorer Sep 25, 2023
d21bea8
se USER_AGENT as constant
PyExplorer Sep 25, 2023
b712e6a
send user_agent to client
PyExplorer Sep 25, 2023
a9a7925
change order for user agent in test/formatting
PyExplorer Sep 25, 2023
e4a6362
change order and delimeter for user agent in client
PyExplorer Sep 25, 2023
1a945cb
rename _USER_AGENT
PyExplorer Sep 27, 2023
2ba5241
_ZYTE_API_USER_AGENT rewrites any other user-agent
PyExplorer Sep 27, 2023
b941f11
add __version__.py
PyExplorer Sep 27, 2023
dd9bddc
fetching version from __version__.py
PyExplorer Sep 27, 2023
a1c51aa
formatting
PyExplorer Sep 27, 2023
d13670c
formatting
PyExplorer Sep 27, 2023
327e89f
fix changing version in bump
PyExplorer Sep 27, 2023
a928a26
formatting
PyExplorer Sep 27, 2023
43f50ea
set version to USER_AGENT from __version__.py
PyExplorer Sep 27, 2023
2f03c47
Merge branch 'main' into user-agent-for-scrapy-zyte-api
PyExplorer Sep 27, 2023
32c4b97
Set new version
PyExplorer Sep 27, 2023
5e997dd
fix test to use full user_agent
PyExplorer Sep 27, 2023
d3fad24
construct user-agent at once
PyExplorer Sep 27, 2023
c779662
using USER_AGENT as default user agent
PyExplorer Sep 27, 2023
7b72bfe
bump zyte-api version to the latest with user-agent
PyExplorer Sep 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .bumpversion.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,4 @@ commit = True
tag = True
tag_name = {new_version}

[bumpversion:file:setup.py]
search = version="{current_version}",
replace = version="{new_version}",
[bumpversion:file:scrapy_zyte_api/__version__.py]
Gallaecio marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions scrapy_zyte_api/__version__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.12.0"
3 changes: 3 additions & 0 deletions scrapy_zyte_api/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from ._params import _ParamParser
from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response
from .utils import USER_AGENT

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -129,6 +130,7 @@ def _build_client(settings):
api_key=settings.get("ZYTE_API_KEY") or None,
api_url=settings.get("ZYTE_API_URL") or API_URL,
n_conn=settings.getint("CONCURRENT_REQUESTS"),
user_agent=settings.get("_ZYTE_API_USER_AGENT", default=USER_AGENT),
)
except NoApiKey:
logger.warning(
Expand Down Expand Up @@ -208,6 +210,7 @@ async def _download_request(
else:
retrying = self._retry_policy
self._log_request(api_params)

try:
api_response = await self._client.request_raw(
api_params,
Expand Down
5 changes: 5 additions & 0 deletions scrapy_zyte_api/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import scrapy
from packaging.version import Version
from zyte_api.utils import USER_AGENT as PYTHON_ZYTE_API_USER_AGENT

from .__version__ import __version__

USER_AGENT = f"scrapy-zyte-api/{__version__} {PYTHON_ZYTE_API_USER_AGENT}"

_SCRAPY_VERSION = Version(scrapy.__version__)
_SCRAPY_2_1_0 = Version("2.1.0")
Expand Down
15 changes: 13 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
import os

import setuptools


def get_version():
about = {}
here = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(here, "scrapy_zyte_api/__version__.py")) as f:
exec(f.read(), about)
return about["__version__"]


setuptools.setup(
name="scrapy-zyte-api",
version="0.12.0",
version=get_version(),
description="Client library to process URLs through Zyte API",
long_description=open("README.rst").read(),
long_description_content_type="text/x-rst",
Expand All @@ -14,7 +25,7 @@
install_requires=[
"packaging>=20.0",
"scrapy>=2.0.1",
"zyte-api>=0.4.0",
"zyte-api>=0.4.7",
],
extras_require={
# Sync with [testenv:provider-pinned] @ tox.ini
Expand Down
26 changes: 26 additions & 0 deletions tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from pytest_twisted import ensureDeferred
from scrapy import Request
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
from scrapy.utils.misc import create_instance
from scrapy.utils.test import get_crawler
from zyte_api.aio.client import AsyncClient
from zyte_api.aio.retry import RetryFactory
from zyte_api.constants import API_URL

from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler
from scrapy_zyte_api.utils import USER_AGENT

from . import DEFAULT_CLIENT_CONCURRENCY, SETTINGS, UNSET, make_handler, set_env

Expand Down Expand Up @@ -456,3 +458,27 @@ def test_trust_env(enabled):
crawler=crawler,
)
assert handler._session._trust_env == enabled


@pytest.mark.parametrize(
"user_agent,expected",
(
(
None,
USER_AGENT,
),
(
"zyte-crawlers/0.0.1",
"zyte-crawlers/0.0.1",
),
),
)
def test_user_agent_for_build_client(user_agent, expected):
settings = Settings(
{
**SETTINGS,
"_ZYTE_API_USER_AGENT": user_agent,
}
)
client = ScrapyZyteAPIDownloadHandler._build_client(settings)
assert client.user_agent == expected