Skip to content

Commit

Permalink
Merge pull request #195 from scrapinghub/response
Browse files Browse the repository at this point in the history
add new AnyResponse
  • Loading branch information
kmike authored Jan 18, 2024
2 parents d783365 + 27b594e commit b7c6d51
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/api-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ Page Inputs
:inherited-members: str,bytes,MultiDict
:show-inheritance:

.. automodule:: web_poet.page_inputs.response
:members:
:undoc-members:
:inherited-members: str
:show-inheritance:

.. automodule:: web_poet.page_inputs.page_params
:members:
:undoc-members:
Expand Down
5 changes: 5 additions & 0 deletions docs/page-objects/inputs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ define as inputs for a page object class, including:
status code and :class:`~web_poet.page_inputs.browser.BrowserHtml`
of a rendered web page.

- :class:`~web_poet.page_inputs.response.AnyResponse`, which either holds
:class:`~web_poet.page_inputs.browser.BrowserResponse` or
:class:`~web_poet.page_inputs.http.HttpResponse` as the ``.response``
instance, depending on which one is available or is more appropriate.

.. _Document Object Model: https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model


Expand Down
34 changes: 34 additions & 0 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from web_poet import BrowserResponse, RequestUrl, ResponseUrl
from web_poet.page_inputs import (
AnyResponse,
BrowserHtml,
HttpRequest,
HttpRequestBody,
Expand Down Expand Up @@ -642,3 +643,36 @@ def test_stats() -> None:
stats.inc("c")

assert stats._stats._stats == {"a": "1", "b": 8, "c": 1}


def test_http_or_browser_response() -> None:
url = "http://example.com"
html = "<html><body><p>Hello, </p><p>world!</p></body></html>"

browser_response = BrowserResponse(url=url, html=html)
response_1 = AnyResponse(response=browser_response)
assert isinstance(response_1.response, BrowserResponse)
assert response_1.response == browser_response

http_response = HttpResponse(url=url, body=html.encode())
response_2 = AnyResponse(response=http_response)
assert isinstance(response_2.response, HttpResponse)
assert response_2.response == http_response

for response in [response_1, response_2]:
assert isinstance(response.url, ResponseUrl)
assert str(response.url) == url
assert response.text == html
assert response.xpath("//p/text()").getall() == ["Hello, ", "world!"]
assert response.css("p::text").getall() == ["Hello, ", "world!"]
assert isinstance(response.selector, parsel.Selector)
assert str(response.urljoin("products")) == "http://example.com/products"
assert response.status is None

response = AnyResponse(response=BrowserResponse(url=url, html=html, status=200))
assert response.status == 200

response = AnyResponse(
response=HttpResponse(url=url, body=html.encode(), status=200)
)
assert response.status == 200
2 changes: 1 addition & 1 deletion tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_apply_rule_kwargs_only() -> None:
ApplyRule(
"example.com",
*[params[r] for r in remove],
**{k: v for k, v in params.items() if k not in remove}, # type: ignore[arg-type]
**{k: v for k, v in params.items() if k not in remove}, # type: ignore[arg-type] # noqa: B038
)


Expand Down
1 change: 1 addition & 0 deletions web_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .fields import field, item_from_fields, item_from_fields_sync
from .page_inputs import (
AnyResponse,
BrowserHtml,
BrowserResponse,
HttpClient,
Expand Down
1 change: 1 addition & 0 deletions web_poet/page_inputs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
HttpResponseHeaders,
)
from .page_params import PageParams
from .response import AnyResponse
from .stats import Stats
from .url import RequestUrl, ResponseUrl
35 changes: 35 additions & 0 deletions web_poet/page_inputs/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Optional, Union

import attrs

from web_poet.mixins import SelectableMixin, UrlShortcutsMixin
from web_poet.page_inputs.browser import BrowserResponse
from web_poet.page_inputs.http import HttpResponse
from web_poet.page_inputs.url import ResponseUrl


@attrs.define
class AnyResponse(SelectableMixin, UrlShortcutsMixin):
"""A container that holds either :class:`~.BrowserResponse` or :class:`~.HttpResponse`."""

response: Union[BrowserResponse, HttpResponse]

@property
def url(self) -> ResponseUrl:
"""URL of the response."""
return self.response.url

@property
def text(self) -> str:
"""Text or HTML contents of the response."""
if isinstance(self.response, BrowserResponse):
return self.response.html
return self.response.text

@property
def status(self) -> Optional[int]:
"""The int status code of the HTTP response, if available."""
return self.response.status

def _selector_input(self) -> str:
return self.text

0 comments on commit b7c6d51

Please sign in to comment.