diff --git a/scraper/src/config/config_loader.py b/scraper/src/config/config_loader.py index 9c954de3..b88506e1 100644 --- a/scraper/src/config/config_loader.py +++ b/scraper/src/config/config_loader.py @@ -6,6 +6,7 @@ """ from collections import OrderedDict +from distutils.util import strtobool import json import os import copy @@ -49,6 +50,7 @@ class ConfigLoader(object): strategy = 'default' strict_redirect = True strip_chars = u".,;:§¶" + update_nb_hits = None use_anchors = False user_agent = 'Algolia DocSearch Crawler' only_content_level = False @@ -112,6 +114,9 @@ def _parse(self): # Parse Env self.app_id = os.environ.get('APPLICATION_ID', None) self.api_key = os.environ.get('API_KEY', None) + self.update_nb_hits = os.environ.get('UPDATE_NB_HITS', None) + if self.update_nb_hits is not None: + self.update_nb_hits = bool(strtobool(self.update_nb_hits)) # Parse config self.selectors = SelectorsParser().parse(self.selectors) @@ -124,7 +129,7 @@ def _parse(self): self.allowed_domains = UrlsParser.build_allowed_domains( self.start_urls, self.stop_urls) - def update_nb_hits(self, nb_hits): + def update_nb_hits_value(self, nb_hits): if self.config_file is not None: # config loaded from file previous_nb_hits = None if 'nb_hits' not in self.config_content else \ @@ -132,7 +137,7 @@ def update_nb_hits(self, nb_hits): nb_hit_updater = NbHitsUpdater(self.config_file, self.config_content, previous_nb_hits, nb_hits) - nb_hit_updater.update() + nb_hit_updater.update(self.update_nb_hits) def get_extra_facets(self): return UrlsParser.get_extra_facets(self.start_urls) diff --git a/scraper/src/config/nb_hits_updater.py b/scraper/src/config/nb_hits_updater.py index a9a38f6b..808bf41f 100644 --- a/scraper/src/config/nb_hits_updater.py +++ b/scraper/src/config/nb_hits_updater.py @@ -1,6 +1,7 @@ from ..helpers import confirm import json import copy +import sys class NbHitsUpdater(object): @@ -16,12 +17,17 @@ def __init__(self, config_file, config_content, previous_nb_hits, self.new_nb_hit = new_nb_hit self.previous_nb_hits = previous_nb_hits - def update(self): + def update(self, perform_update): if self._update_needed(): print("previous nb_hits: " + str(self.previous_nb_hits) + "\n") - if confirm( - 'Do you want to update the nb_hits in ' + self.config_file + ' ?'): + if perform_update is None: + if sys.stdout.isatty(): + perform_update = confirm('Do you want to update the nb_hits in ' + self.config_file + ' ?') + else: + perform_update = True + + if perform_update: try: self._update_config() print("\n[OK] " + self.config_file + " has been updated") diff --git a/scraper/src/index.py b/scraper/src/index.py index cf4cf674..ea4036b7 100644 --- a/scraper/src/index.py +++ b/scraper/src/index.py @@ -84,7 +84,7 @@ def run_config(config): if DocumentationSpider.NB_INDEXED > 0: algolia_helper.commit_tmp_index() print('Nb hits: ' + str(DocumentationSpider.NB_INDEXED)) - config.update_nb_hits(DocumentationSpider.NB_INDEXED) + config.update_nb_hits_value(DocumentationSpider.NB_INDEXED) else: print('Crawling issue: nbHits 0 for ' + config.index_name) algolia_helper.report_crawling_issue()