diff --git a/jobfunnel/__init__.py b/jobfunnel/__init__.py index cb498eae..fc053504 100644 --- a/jobfunnel/__init__.py +++ b/jobfunnel/__init__.py @@ -1,3 +1,3 @@ """JobFunnel base package init, we keep module version here. """ -__version__ = '3.0.1' +__version__ = '3.0.2' diff --git a/jobfunnel/backend/scrapers/base.py b/jobfunnel/backend/scrapers/base.py index ed473661..6b446da9 100644 --- a/jobfunnel/backend/scrapers/base.py +++ b/jobfunnel/backend/scrapers/base.py @@ -343,6 +343,9 @@ def scrape_job(self, job_soup: BeautifulSoup, delay: float, if job and not invalid_job: try: job.validate() + # Prefix the id with the scraper name to avoid key conflicts + new_key_id = job.provider + '_' + job.key_id + job.key_id = new_key_id except Exception as err: # Bad job scrapes can't take down execution! # NOTE: desc too short etc, usually indicates that the job diff --git a/readme.md b/readme.md index 0c26a1e8..594f015f 100644 --- a/readme.md +++ b/readme.md @@ -97,6 +97,11 @@ Open the master CSV file and update the per-job `status`: ``` funnel inline -h ``` + +# CAPTCHA + JobFunnel does not solve CAPTCHA. If, while scraping, you receive a + `Unable to extract jobs from initial search result page:\` error. + Then open that url on your browser and solve the CAPTCHA manually. [requirements]:requirements.txt