From bfa9e875c839ac60aa4c06fe3c508a5bb498a9de Mon Sep 17 00:00:00 2001 From: Grzegorz Milka Date: Mon, 9 Sep 2024 09:45:37 +0200 Subject: [PATCH] refactor(coop): refresh Coop implementation --- fetcher/coop_supercard.py | 34 +++++++++++++++++++++++++- fetcher/playwrightutils.py | 24 ++++++++++++++++-- fetcher/tool.py | 50 +++++++++++++++----------------------- 3 files changed, 74 insertions(+), 34 deletions(-) diff --git a/fetcher/coop_supercard.py b/fetcher/coop_supercard.py index c14999b..f8c3000 100644 --- a/fetcher/coop_supercard.py +++ b/fetcher/coop_supercard.py @@ -9,9 +9,15 @@ import playwright.async_api import requests -from . import op +from . import op, playwrightutils from .fileutils import atomic_write +__all__ = [ + 'Credentials', + 'fetch_credentials', + 'fetch_and_save_receipts', +] + class Credentials(NamedTuple): id: str @@ -26,6 +32,32 @@ async def fetch_credentials(client: op.OpSdkClient) -> Credentials: return Credentials(id=username, pwd=password) +async def fetch_and_save_receipts( + last_bc_path: pathlib.Path, + download_directory: pathlib.Path, + creds: Credentials, + page: playwright.async_api.Page, + browser_context: playwright.async_api.BrowserContext, + log: Callable[[str], None], +): + """Fetches and saves Coop receipts from supercard.ch.""" + last_bc = load_last_bc(last_bc_path) + log(f'Last pulled BC is {last_bc}.') + reverse_chronological_receipt_urls = await fetch_receipt_urls(page, creds) + # Wait 5 seconds to make sure that all background scripts have done + # their work. + await asyncio.sleep(5) + cookies = playwrightutils.playwright_cookie_jar_to_requests_cookies( + await browser_context.cookies()) + chronological_unprocessed_receipt_urls = get_chronological_unprocessed_urls( + reverse_chronological_receipt_urls, last_bc) + for coop_receipt in fetch_receipts( + chronological_unprocessed_receipt_urls, + lambda url: fetch_receipt(url, cookies)): + log(f'Saving a receipt with BC={coop_receipt.bc}.') + save_receipt(download_directory, last_bc_path, receipt=coop_receipt) + + async def login(page: playwright.async_api.Page, creds: Credentials) -> None: """Logs in to supercard.ch. diff --git a/fetcher/playwrightutils.py b/fetcher/playwrightutils.py index 5a3c184..10a44e8 100644 --- a/fetcher/playwrightutils.py +++ b/fetcher/playwrightutils.py @@ -141,11 +141,13 @@ async def intercept_download( @contextlib.asynccontextmanager -async def new_page( +async def new_stack( browser_type: Browser, headless: bool = False, downloads_path: Optional[pathlib.Path] = None -) -> typing.AsyncIterator[playwright.async_api.Page]: +) -> typing.AsyncIterator[ + tuple[playwright.async_api.Playwright, playwright.async_api.Browser, + playwright.async_api.BrowserContext, playwright.async_api.Page]]: """Opens a new page in a new context. :param browser playwright.async_api.BrowserType: The browser to use. @@ -161,4 +163,22 @@ async def new_page( async_closing(await browser.new_context(no_viewport=not headless)) as context, async_closing(await context.new_page()) as page): + yield (pw, browser, context, page) + + +@contextlib.asynccontextmanager +async def new_page( + browser_type: Browser, + headless: bool = False, + downloads_path: Optional[pathlib.Path] = None +) -> typing.AsyncIterator[playwright.async_api.Page]: + """Opens a new page in a new context. + + :param browser playwright.async_api.BrowserType: The browser to use. + :param headless bool: Whether to run a fixed-viewport headless browser or a + :param downloads_path Optional[pathlib.Path]: The path used for downloads. + responsive one. Defaults to False. + """ + async with new_stack(browser_type, headless, + downloads_path) as (_, _, _, page): yield page diff --git a/fetcher/tool.py b/fetcher/tool.py index 5ee48b6..393c46b 100755 --- a/fetcher/tool.py +++ b/fetcher/tool.py @@ -150,7 +150,10 @@ async def run(): def coop_supercard_pull(ctx, headless: bool, verbose: bool) -> None: """Fetches Coop receipt PDFs from supercard.ch. - This command saves the PDFs in the download directory. + This command: + + * Saves the PDFs in the download directory as "Coop BC.pdf".\n + * Writes the last receipts’ BC (an identifier) to the last BC file. supercard.ch occasionally asks for a captcha. When this happens, human intervention is required. @@ -158,38 +161,23 @@ def coop_supercard_pull(ctx, headless: bool, verbose: bool) -> None: config = ctx.obj['config'] download_directory = Path(config['download_directory']) last_bc_path = Path(config['supercard_last_bc_file']) - last_bc = coop_supercard.load_last_bc(last_bc_path) - if verbose: - print(f'Last pulled BC is {last_bc}.') + + def print_if_verbose(msg): + return print(msg) if verbose else lambda _: None async def run() -> None: - creds: coop_supercard.Credentials = await coop_supercard.fetch_credentials( - await connect_op(config)) - async with async_playwright() as pw: - # Use Chromium. In July 2024, Firefox stopped working: the login - # page was loading indefinitely. - browser = await pw.chromium.launch(headless=headless) - context = await browser.new_context(no_viewport=not headless) - page = await context.new_page() - reverse_chronological_receipt_urls = await coop_supercard.fetch_receipt_urls( - page, creds) - # Wait 5 seconds to make sure that all background scripts have done their work. - await asyncio.sleep(5) - cookies = playwrightutils.playwright_cookie_jar_to_requests_cookies( - await context.cookies()) - chronological_unprocessed_receipt_urls = coop_supercard.get_chronological_unprocessed_urls( - reverse_chronological_receipt_urls, last_bc) - for coop_receipt in coop_supercard.fetch_receipts( - chronological_unprocessed_receipt_urls, - lambda url: coop_supercard.fetch_receipt(url, cookies)): - if verbose: - print(f'Saving a receipt with BC={coop_receipt.bc}.') - coop_supercard.save_receipt(download_directory, - last_bc_path, - receipt=coop_receipt) - await page.close() - await context.close() - await browser.close() + creds: coop_supercard.Credentials = (await + coop_supercard.fetch_credentials( + await connect_op(config))) + # Use Chromium. In July 2024, Firefox stopped working: the login + # page was loading indefinitely. + async with playwrightutils.new_stack( + browser_type=Browser.CHROMIUM, + headless=headless) as (pw, browser, browser_context, page): + + await coop_supercard.fetch_and_save_receipts( + last_bc_path, download_directory, creds, page, browser_context, + print_if_verbose) asyncio.run(run())