Skip to content

Commit

Permalink
refactor(coop): refresh Coop implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
gregorias committed Sep 9, 2024
1 parent 8b226d8 commit bfa9e87
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 34 deletions.
34 changes: 33 additions & 1 deletion fetcher/coop_supercard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@
import playwright.async_api
import requests

from . import op
from . import op, playwrightutils
from .fileutils import atomic_write

__all__ = [
'Credentials',
'fetch_credentials',
'fetch_and_save_receipts',
]


class Credentials(NamedTuple):
id: str
Expand All @@ -26,6 +32,32 @@ async def fetch_credentials(client: op.OpSdkClient) -> Credentials:
return Credentials(id=username, pwd=password)


async def fetch_and_save_receipts(
last_bc_path: pathlib.Path,
download_directory: pathlib.Path,
creds: Credentials,
page: playwright.async_api.Page,
browser_context: playwright.async_api.BrowserContext,
log: Callable[[str], None],
):
"""Fetches and saves Coop receipts from supercard.ch."""
last_bc = load_last_bc(last_bc_path)
log(f'Last pulled BC is {last_bc}.')
reverse_chronological_receipt_urls = await fetch_receipt_urls(page, creds)
# Wait 5 seconds to make sure that all background scripts have done
# their work.
await asyncio.sleep(5)
cookies = playwrightutils.playwright_cookie_jar_to_requests_cookies(
await browser_context.cookies())
chronological_unprocessed_receipt_urls = get_chronological_unprocessed_urls(
reverse_chronological_receipt_urls, last_bc)
for coop_receipt in fetch_receipts(
chronological_unprocessed_receipt_urls,
lambda url: fetch_receipt(url, cookies)):
log(f'Saving a receipt with BC={coop_receipt.bc}.')
save_receipt(download_directory, last_bc_path, receipt=coop_receipt)


async def login(page: playwright.async_api.Page, creds: Credentials) -> None:
"""Logs in to supercard.ch.
Expand Down
24 changes: 22 additions & 2 deletions fetcher/playwrightutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,13 @@ async def intercept_download(


@contextlib.asynccontextmanager
async def new_page(
async def new_stack(
browser_type: Browser,
headless: bool = False,
downloads_path: Optional[pathlib.Path] = None
) -> typing.AsyncIterator[playwright.async_api.Page]:
) -> typing.AsyncIterator[
tuple[playwright.async_api.Playwright, playwright.async_api.Browser,
playwright.async_api.BrowserContext, playwright.async_api.Page]]:
"""Opens a new page in a new context.
:param browser playwright.async_api.BrowserType: The browser to use.
Expand All @@ -161,4 +163,22 @@ async def new_page(
async_closing(await
browser.new_context(no_viewport=not headless)) as
context, async_closing(await context.new_page()) as page):
yield (pw, browser, context, page)


@contextlib.asynccontextmanager
async def new_page(
browser_type: Browser,
headless: bool = False,
downloads_path: Optional[pathlib.Path] = None
) -> typing.AsyncIterator[playwright.async_api.Page]:
"""Opens a new page in a new context.
:param browser playwright.async_api.BrowserType: The browser to use.
:param headless bool: Whether to run a fixed-viewport headless browser or a
:param downloads_path Optional[pathlib.Path]: The path used for downloads.
responsive one. Defaults to False.
"""
async with new_stack(browser_type, headless,
downloads_path) as (_, _, _, page):
yield page
50 changes: 19 additions & 31 deletions fetcher/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,46 +150,34 @@ async def run():
def coop_supercard_pull(ctx, headless: bool, verbose: bool) -> None:
"""Fetches Coop receipt PDFs from supercard.ch.
This command saves the PDFs in the download directory.
This command:
* Saves the PDFs in the download directory as "Coop BC.pdf".\n
* Writes the last receipts’ BC (an identifier) to the last BC file.
supercard.ch occasionally asks for a captcha. When this happens, human
intervention is required.
"""
config = ctx.obj['config']
download_directory = Path(config['download_directory'])
last_bc_path = Path(config['supercard_last_bc_file'])
last_bc = coop_supercard.load_last_bc(last_bc_path)
if verbose:
print(f'Last pulled BC is {last_bc}.')

def print_if_verbose(msg):
return print(msg) if verbose else lambda _: None

async def run() -> None:
creds: coop_supercard.Credentials = await coop_supercard.fetch_credentials(
await connect_op(config))
async with async_playwright() as pw:
# Use Chromium. In July 2024, Firefox stopped working: the login
# page was loading indefinitely.
browser = await pw.chromium.launch(headless=headless)
context = await browser.new_context(no_viewport=not headless)
page = await context.new_page()
reverse_chronological_receipt_urls = await coop_supercard.fetch_receipt_urls(
page, creds)
# Wait 5 seconds to make sure that all background scripts have done their work.
await asyncio.sleep(5)
cookies = playwrightutils.playwright_cookie_jar_to_requests_cookies(
await context.cookies())
chronological_unprocessed_receipt_urls = coop_supercard.get_chronological_unprocessed_urls(
reverse_chronological_receipt_urls, last_bc)
for coop_receipt in coop_supercard.fetch_receipts(
chronological_unprocessed_receipt_urls,
lambda url: coop_supercard.fetch_receipt(url, cookies)):
if verbose:
print(f'Saving a receipt with BC={coop_receipt.bc}.')
coop_supercard.save_receipt(download_directory,
last_bc_path,
receipt=coop_receipt)
await page.close()
await context.close()
await browser.close()
creds: coop_supercard.Credentials = (await
coop_supercard.fetch_credentials(
await connect_op(config)))
# Use Chromium. In July 2024, Firefox stopped working: the login
# page was loading indefinitely.
async with playwrightutils.new_stack(
browser_type=Browser.CHROMIUM,
headless=headless) as (pw, browser, browser_context, page):

await coop_supercard.fetch_and_save_receipts(
last_bc_path, download_directory, creds, page, browser_context,
print_if_verbose)

asyncio.run(run())

Expand Down

0 comments on commit bfa9e87

Please sign in to comment.