Skip to content

Commit

Permalink
chg: Improve strict typing
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafiot committed Jan 26, 2024
1 parent 2def9db commit 1e3bb24
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 53 deletions.
81 changes: 29 additions & 52 deletions playwrightcapture/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from charset_normalizer import from_bytes
from playwright.async_api import async_playwright, Frame, Error, Page, Download
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
from playwright_stealth import stealth_async # type: ignore
from playwright_stealth import stealth_async # type: ignore[import]
from w3lib.html import strip_html5_whitespace
from w3lib.url import canonicalize_url, safe_url_string

Expand All @@ -46,8 +46,8 @@
BROWSER = Literal['chromium', 'firefox', 'webkit']

try:
import pydub # type: ignore
from speech_recognition import Recognizer, AudioFile # type: ignore
import pydub # type: ignore[import]
from speech_recognition import Recognizer, AudioFile # type: ignore[import]
CAN_SOLVE_CAPTCHA = True
except ImportError:
CAN_SOLVE_CAPTCHA = False
Expand Down Expand Up @@ -122,9 +122,9 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
self._user_agent: str = ''
self._timezone_id: str = ''
self._locale: str = ''
self._color_scheme: str = ''
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None

async def __aenter__(self) -> 'Capture':
async def __aenter__(self) -> Capture:
'''Launch the browser'''
self._temp_harfile = NamedTemporaryFile(delete=False)

Expand All @@ -138,10 +138,9 @@ async def __aenter__(self) -> 'Capture':
elif self.browser_name not in self._browsers:
raise UnknownPlaywrightBrowser(f'Incorrect browser name {self.browser_name}, must be in {", ".join(self._browsers)}')

launch_settings = {}
if self.proxy:
launch_settings['proxy'] = self.proxy
self.browser = await self.playwright[self.browser_name].launch(**launch_settings) # type: ignore
self.browser = await self.playwright[self.browser_name].launch(
proxy=self.proxy if self.proxy else None
)
return self

async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
Expand Down Expand Up @@ -329,59 +328,37 @@ def user_agent(self, user_agent: str | None) -> None:
self._user_agent = user_agent

@property
def color_scheme(self) -> str:
def color_scheme(self) -> Literal['dark', 'light', 'no-preference', 'null'] | None:
return self._color_scheme

@color_scheme.setter
def color_scheme(self, color_scheme: str | None) -> None:
def color_scheme(self, color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None) -> None:
if not color_scheme:
return
schemes = ['light', 'dark', 'no-preference']
if color_scheme in ['light', 'dark', 'no-preference']:
schemes = ['light', 'dark', 'no-preference', 'null']
if color_scheme in schemes:
self._color_scheme = color_scheme
else:
raise InvalidPlaywrightParameter(f'Invalid color scheme ({color_scheme}), must be in {", ".join(schemes)}.')

async def initialize_context(self) -> None:
default_context_settings = {
'record_har_path': self._temp_harfile.name,
'ignore_https_errors': True
}

device_context_settings = {}
if self.device_name:
default_context_settings.update(self.playwright.devices[self.device_name])

if self.http_credentials:
default_context_settings['http_credentials'] = self.http_credentials

if self.user_agent:
# User defined UA, can overwrite device UA
default_context_settings['user_agent'] = self.user_agent

if self.locale:
default_context_settings['locale'] = self.locale

if self.timezone_id:
default_context_settings['timezone_id'] = self.timezone_id

if self.color_scheme:
default_context_settings['color_scheme'] = self.color_scheme

if self.viewport:
# User defined viewport, can overwrite device viewport
default_context_settings['viewport'] = self.viewport
elif 'viewport' not in default_context_settings:
# No viewport given, fallback to default
default_context_settings['viewport'] = self._default_viewport

if self.browser_name == 'firefox' and default_context_settings.get('is_mobile'):
# NOTE: Not supported, see https://github.com/microsoft/playwright-python/issues/1509
default_context_settings.pop('is_mobile')

# FIXME: video for debug
# default_context_settings['record_video_dir'] = './videos/'

self.context = await self.browser.new_context(**default_context_settings) # type: ignore
device_context_settings = self.playwright.devices[self.device_name]

self.context = await self.browser.new_context(
record_har_path=self._temp_harfile.name,
ignore_https_errors=True,
http_credentials=self.http_credentials if self.http_credentials else None,
user_agent=self.user_agent if self.user_agent else device_context_settings.pop('user_agent', None),
locale=self.locale if self.locale else None,
timezone_id=self.timezone_id if self.timezone_id else None,
color_scheme=self.color_scheme if self.color_scheme else None,
viewport=self.viewport if self.viewport else device_context_settings.pop('viewport', self._default_viewport),
# For debug only
# record_video_dir='./videos/',
**device_context_settings
)
self.context.set_default_timeout(self._capture_timeout * 1000)

if self.cookies:
Expand Down Expand Up @@ -650,7 +627,7 @@ async def handle_download(download: Download) -> None:
rendered_hostname_only=rendered_hostname_only,
max_depth_capture_time=max_capture_time),
timeout=max_capture_time + 1) # just adding a bit of padding so playwright has the chance to raise the exception first
to_return['children'].append(child_capture) # type: ignore
to_return['children'].append(child_capture) # type: ignore[union-attr]
except (TimeoutError, asyncio.exceptions.TimeoutError):
self.logger.info(f'Timeout error, took more than {max_capture_time}s. Unable to capture {url}.')
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "PlaywrightCapture"
version = "1.22.7"
version = "1.22.8"
description = "A simple library to capture websites using playwright"
authors = ["Raphaël Vinot <[email protected]>"]
license = "BSD-3-Clause"
Expand Down

0 comments on commit 1e3bb24

Please sign in to comment.