Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exception is caught when calling scrape now; this should fix our CI Problems #130

Merged
merged 1 commit into from
Jan 20, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions jobfunnel/backend/jobfunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ def run(self) -> None:

# Scrape new jobs from all our configured providers and cache them
scraped_jobs_dict = self.scrape()
self.write_cache(scraped_jobs_dict)

# Filter out any jobs we have rejected, archived or block-listed
# NOTE: we do not remove duplicates here as these may trigger updates
if scraped_jobs_dict:
self.write_cache(scraped_jobs_dict)
scraped_jobs_dict = self.job_filter.filter(
scraped_jobs_dict, remove_existing_duplicate_keys=False
)
Expand Down Expand Up @@ -230,10 +230,14 @@ def scrape(self) -> Dict[str, Job]:

# Iterate thru scrapers and run their scrape.
jobs = {} # type: Dict[str, Job]
incoming_jobs_dict = {}
for scraper_cls in self.config.scrapers:
start = time()
scraper = scraper_cls(self.session, self.config, self.job_filter)
incoming_jobs_dict = scraper.scrape()
try:
incoming_jobs_dict = scraper.scrape()
except Exception as e:
self.logger.error(f"Failed to scrape jobs for {scraper_cls.__name__}")

# Ensure we have no duplicates between our scrapers by key-id
# (since we are updating the jobs dict with results)
Expand Down