diff --git a/mypy.ini b/mypy.ini index 6e76e80..5d2d137 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,6 +3,7 @@ strict = True warn_return_any = False show_error_context = True pretty = True +follow_untyped_imports = True [mypy-docs.source.*] ignore_errors = True diff --git a/playwrightcapture/capture.py b/playwrightcapture/capture.py index b1fa744..66b9ec0 100644 --- a/playwrightcapture/capture.py +++ b/playwrightcapture/capture.py @@ -13,12 +13,11 @@ import time from base64 import b64decode -from dataclasses import dataclass from io import BytesIO from logging import LoggerAdapter, Logger from tempfile import NamedTemporaryFile from typing import Any, TypedDict, Literal, TYPE_CHECKING -from collections.abc import MutableMapping, Iterator +from collections.abc import MutableMapping from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit from zipfile import ZipFile @@ -31,7 +30,7 @@ from playwright._impl._errors import TargetClosedError from playwright.async_api import async_playwright, Frame, Error, Page, Download, Request from playwright.async_api import TimeoutError as PlaywrightTimeoutError -from playwright_stealth import stealth_async, StealthConfig # type: ignore[import-untyped] +from playwright_stealth import Stealth # type: ignore[attr-defined] from puremagic import PureError, from_string from w3lib.html import strip_html5_whitespace from w3lib.url import canonicalize_url, safe_url_string @@ -54,8 +53,8 @@ BROWSER = Literal['chromium', 'firefox', 'webkit'] try: - import pydub # type: ignore[import-untyped] - from speech_recognition import Recognizer, AudioFile # type: ignore[import-untyped] + import pydub + from speech_recognition import Recognizer, AudioFile CAN_SOLVE_CAPTCHA = True except ImportError: CAN_SOLVE_CAPTCHA = False @@ -94,39 +93,6 @@ def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, Muta # https://bot.incolumitas.com/ # https://fingerprintjs.github.io/BotD/main/ -@dataclass -class PCStealthConfig(StealthConfig): # type: ignore[misc] - - @property - def enabled_scripts(self) -> Iterator[str]: - self.chrome_app = True - self.chrome_csi = True - self.chrome_runtime = True - self.chrome_load_times = True - self.navigator_plugins = True - self.hairline = True - self.iframe_content_window = True - self.media_codecs = True - - # permissions are handled directly in playwright - self.navigator_permissions = False - # Platform is correct now - self.navigator_platform = False - # probably useless, but it will fallback to 4 regardless - self.navigator_hardware_concurrency = 4 - # Webgl vendor is correct now - self.webgl_vendor = False - # Set by the viewport - self.outerdimensions = False - - # Not working with Playwright 1.45+ - self.navigator_languages = False # Causes issue - self.navigator_user_agent = False # Causes issues - self.navigator_vendor = False # Causes issues - - yield from super().enabled_scripts - - class Capture(): _browsers: list[BROWSER] = ['chromium', 'firefox', 'webkit'] @@ -460,6 +426,14 @@ async def initialize_context(self) -> None: # record_video_dir='./videos/', **device_context_settings ) + + stealth = Stealth( + navigator_languages_override=(self.locale, self.locale.split('-')[0]) if self.locale else ("en-US", "en"), + navigator_user_agent_override=ua, + init_scripts_only=True + ) + await stealth.apply_stealth_async(self.context) + self.context.set_default_timeout(self._capture_timeout * 1000) if self.cookies: @@ -849,8 +823,6 @@ async def store_request(request: Request) -> None: await self.__dialog_clickthrough(page) await self.__dialog_tarteaucitron_clickthrough(page) - await stealth_async(page, PCStealthConfig()) - page.set_default_timeout((self._capture_timeout - 2) * 1000) # trigger a callback on each request to store it in a dict indexed by URL to get it back from the favicon fetcher page.on("requestfinished", store_request) @@ -1356,12 +1328,12 @@ async def _recaptcha_solver(self, page: Page) -> bool: mp3_content = await response.read() with NamedTemporaryFile() as mp3_file, NamedTemporaryFile() as wav_file: mp3_file.write(mp3_content) - pydub.AudioSegment.from_mp3(mp3_file.name).export(wav_file.name, format="wav") - recognizer = Recognizer() - recaptcha_audio = AudioFile(wav_file.name) + pydub.AudioSegment.from_mp3(mp3_file.name).export(wav_file.name, format="wav") # type: ignore[attr-defined,no-untyped-call] + recognizer = Recognizer() # type: ignore[no-untyped-call] + recaptcha_audio = AudioFile(wav_file.name) # type: ignore[no-untyped-call] with recaptcha_audio as source: - audio = recognizer.record(source) - text = recognizer.recognize_google(audio) + audio = recognizer.record(source) # type: ignore[no-untyped-call] + text = recognizer.recognize_google(audio) # type: ignore[attr-defined] await main_frame.get_by_role("textbox", name="Enter what you hear").fill(text) await main_frame.get_by_role("button", name="Verify").click() await self._safe_wait(page, 5) diff --git a/poetry.lock b/poetry.lock index 2c4c52d..36a5c8f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1175,20 +1175,25 @@ pyee = "12.0.0" [[package]] name = "playwright-stealth" -version = "1.0.6" +version = "2.0.0rc4" description = "playwright stealth" optional = false -python-versions = ">=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "playwright-stealth-1.0.6.tar.gz", hash = "sha256:b504d951d00fac755c7d13665a29611d415180510bd7d23f14ebc89439ba2043"}, - {file = "playwright_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b1b2bcf58eb6859aa53d42c49b91c4e27b74a6d13fc3d0c85eea513dd55efda3"}, -] +python-versions = ">=3.8" +files = [] +develop = false [package.dependencies] playwright = "*" [package.extras] -test = ["pytest"] +lint = ["black (>=24.8.0,<24.9.0)"] +test = ["pytest", "pytest-asyncio"] + +[package.source] +type = "git" +url = "https://github.com/Mattwmaster58/playwright_stealth" +reference = "HEAD" +resolved_reference = "11a09ea492f1a43a5c1fee8ee79605b7f05fca43" [[package]] name = "pluggy" @@ -1935,4 +1940,4 @@ recaptcha = ["SpeechRecognition", "pydub"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "13bb80b2b378fe61ae3d2722ace042194affd67cb4236492a441a3731bfa2913" +content-hash = "36a692950f71bce521461e68c6a0397816b8ec178734eb4df15aa9b93f58bd9e" diff --git a/pyproject.toml b/pyproject.toml index 6964fd6..ef27e06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,12 +26,12 @@ w3lib = "^2.2.1" pydub = {version = "^0.25.1", optional = true} SpeechRecognition = {version = ">=3.12.0", optional = true} tzdata = "^2024.2" -playwright-stealth = "^1.0.6" setuptools = "^75.6.0" puremagic = "^1.28" async-timeout = {version = "^4.0.3", python = "<3.11"} aiohttp = {version = "^3.11.11", extras = ["speedups"]} aiohttp-socks = "^0.10" +playwright-stealth = {git = "https://github.com/Mattwmaster58/playwright_stealth"} [tool.poetry.extras] recaptcha = ["pydub", "SpeechRecognition"]