diff --git a/src/collectors/collectors/atom_collector.py b/src/collectors/collectors/atom_collector.py index ce3ab2c4f..7ed4832cb 100644 --- a/src/collectors/collectors/atom_collector.py +++ b/src/collectors/collectors/atom_collector.py @@ -30,7 +30,13 @@ class AtomCollector(BaseCollector): @BaseCollector.ignore_exceptions def collect(self, source): + """Collect data from Atom feed. + Arguments: + source -- Source object. + """ + + BaseCollector.update_last_attempt(source) feed_url = source.parameter_values['ATOM_FEED_URL'] user_agent = source.parameter_values['USER_AGENT'] interval = source.parameter_values['REFRESH_INTERVAL'] diff --git a/src/collectors/collectors/base_collector.py b/src/collectors/collectors/base_collector.py index b9b8f18e5..0d07a6095 100644 --- a/src/collectors/collectors/base_collector.py +++ b/src/collectors/collectors/base_collector.py @@ -35,6 +35,12 @@ def get_info(self): def collect(self, source): pass + @staticmethod + def update_last_attempt(source): + response, status_code = CoreApi.update_collector_last_attepmt(source.id) + if status_code != 200: + log_critical("Update last attempt: HTTP {}, response: {}".format(status_code, response)) + # wrap scheduled action with exception because scheduler fail plan next one @staticmethod def ignore_exceptions(func): diff --git a/src/collectors/collectors/email_collector.py b/src/collectors/collectors/email_collector.py index 40f751c96..34a7eecfe 100644 --- a/src/collectors/collectors/email_collector.py +++ b/src/collectors/collectors/email_collector.py @@ -33,9 +33,14 @@ class EmailCollector(BaseCollector): @BaseCollector.ignore_exceptions def collect(self, source): + """Collect data from email source. - news_items = [] + Arguments: + source -- Source object. + """ + BaseCollector.update_last_attempt(source) + news_items = [] email_server_type = source.parameter_values['EMAIL_SERVER_TYPE'] email_server_hostname = source.parameter_values['EMAIL_SERVER_HOSTNAME'] email_server_port = source.parameter_values['EMAIL_SERVER_PORT'] diff --git a/src/collectors/collectors/rss_collector.py b/src/collectors/collectors/rss_collector.py index 496251163..476a2d9c2 100644 --- a/src/collectors/collectors/rss_collector.py +++ b/src/collectors/collectors/rss_collector.py @@ -46,6 +46,8 @@ def collect(self, source): Arguments: source -- Source object. """ + + BaseCollector.update_last_attempt(source) feed_url = source.parameter_values["FEED_URL"] interval = source.parameter_values["REFRESH_INTERVAL"] links_limit = BaseCollector.read_int_parameter("LINKS_LIMIT", 0, source) diff --git a/src/collectors/collectors/scheduled_tasks_collector.py b/src/collectors/collectors/scheduled_tasks_collector.py index 58dd46ee8..61c87b3d0 100644 --- a/src/collectors/collectors/scheduled_tasks_collector.py +++ b/src/collectors/collectors/scheduled_tasks_collector.py @@ -25,7 +25,13 @@ class ScheduledTasksCollector(BaseCollector): @BaseCollector.ignore_exceptions def collect(self, source): + """Collect data from scheduled tasks. + Arguments: + source -- Source object. + """ + + BaseCollector.update_last_attempt(source) news_items = [] head, tail = os.path.split(source.parameter_values['TASK_COMMAND']) task_title = source.parameter_values['TASK_TITLE'] diff --git a/src/collectors/collectors/slack_collector.py b/src/collectors/collectors/slack_collector.py index 9241e822d..8ae4334fd 100644 --- a/src/collectors/collectors/slack_collector.py +++ b/src/collectors/collectors/slack_collector.py @@ -25,9 +25,14 @@ class SlackCollector(BaseCollector): @BaseCollector.ignore_exceptions def collect(self, source): + """Collect data from Slack source. - news_items = [] + Arguments: + source -- Source object. + """ + BaseCollector.update_last_attempt(source) + news_items = [] proxy_server = source.parameter_values['PROXY_SERVER'] if proxy_server: diff --git a/src/collectors/collectors/twitter_collector.py b/src/collectors/collectors/twitter_collector.py index 888083f69..a3b6ddd73 100644 --- a/src/collectors/collectors/twitter_collector.py +++ b/src/collectors/collectors/twitter_collector.py @@ -30,8 +30,14 @@ class TwitterCollector(BaseCollector): @BaseCollector.ignore_exceptions def collect(self, source): + """Collect data from X source. + + Arguments: + source -- Source object. + """ try: + BaseCollector.update_last_attempt(source) news_items = [] attributes = [] diff --git a/src/collectors/collectors/web_collector.py b/src/collectors/collectors/web_collector.py index 346bbb0bb..3e845d6ac 100644 --- a/src/collectors/collectors/web_collector.py +++ b/src/collectors/collectors/web_collector.py @@ -462,6 +462,7 @@ def __run_tor(self): def collect(self, source): """Collects news items from this source (main function)""" + BaseCollector.update_last_attempt(source) self.source = source log_manager.log_collector_activity('web', self.source.name, 'Starting collector') @@ -597,9 +598,11 @@ def __process_title_page_articles(self, browser, title_page_handle, index_url): for item in article_items: count += 1 # try: - # print(item.get_attribute('outerHTML'), flush=True) - # except Exception: + # print("H: {0} {1:.200}".format(count, item.get_attribute('outerHTML')), flush=True) + # except Exception as ex: # pass + # if first item works but next items have problems - it's because this: + # https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/#stale-element-reference-exception link = None try: link = item.get_attribute('href') @@ -632,10 +635,9 @@ def __process_title_page_articles(self, browser, title_page_handle, index_url): try: news_item = self.__process_article_page(index_url, browser) if news_item: - log_manager.log_collector_activity('web', self.source.name, 'Successfully parsed an article') # log_manager.log_collector_activity('web', self.source.name, '... Title : {0}'.format(news_item.title)) - # log_manager.log_collector_activity('web', self.source.name, '... Review : {0:.100}'.format(news_item.review)) - # log_manager.log_collector_activity('web', self.source.name, '... Content : {0:.100}'.format(news_item.content)) + # log_manager.log_collector_activity('web', self.source.name, '... Review : {0:.100}'.format(news_item.review.replace("\r", "").replace("\n", " ").strip())) + # log_manager.log_collector_activity('web', self.source.name, '... Content : {0:.100}'.format(news_item.content.replace("\r", "").replace("\n", " ").strip())) # log_manager.log_collector_activity('web', self.source.name, '... Author : {0}'.format(news_item.author)) # log_manager.log_collector_activity('web', self.source.name, '... Published: {0}'.format(news_item.published)) self.news_items.append(news_item) diff --git a/src/collectors/managers/collectors_manager.py b/src/collectors/managers/collectors_manager.py index 716982f7a..c9d23a2f5 100644 --- a/src/collectors/managers/collectors_manager.py +++ b/src/collectors/managers/collectors_manager.py @@ -2,7 +2,7 @@ import threading import time -from managers.log_manager import log_debug, log_system_activity +from managers.log_manager import log_debug, log_system_activity, log_warning from collectors.atom_collector import AtomCollector from collectors.email_collector import EmailCollector from collectors.manual_collector import ManualCollector @@ -19,9 +19,10 @@ def reportStatus(): while True: log_debug("[{}] Sending status update...".format(__name__)) - response, code = CoreApi.update_collector_status() - log_debug("[{}] Core responded with: HTTP {}, {}".format(__name__, code, response)) - # for debuging scheduler tasks + response, status_code = CoreApi.update_collector_status() + if status_code != 200: + log_warning("[{}] Core status update response: HTTP {}, {}".format(__name__, status_code, response)) + # for debugging scheduler tasks # for key in collectors: # for source in collectors[key].osint_sources: # if hasattr(source, "scheduler_job"): diff --git a/src/collectors/remote/core_api.py b/src/collectors/remote/core_api.py index 03947369b..44cfde4d8 100755 --- a/src/collectors/remote/core_api.py +++ b/src/collectors/remote/core_api.py @@ -103,6 +103,20 @@ def update_collector_status(cls): logger.debug(ex) return ex, 400 + @classmethod + def update_collector_last_attepmt(cls, source_id): + """Update collector's "last attempted" record with current datetime. + + Returns: + tuple: A tuple containing the JSON response and the HTTP status code. + """ + try: + response = requests.get(cls.api_url + "/api/v1/collectors/osint-sources/" + urllib.parse.quote(source_id) + "/attempt", headers=cls.headers) + return response.json(), response.status_code + except Exception as ex: + logger.debug(ex) + return ex, 400 + @classmethod def add_news_items(cls, news_items): """Add news items to the collector. diff --git a/src/core/api/collectors.py b/src/core/api/collectors.py index c4018f134..c59737fb7 100644 --- a/src/core/api/collectors.py +++ b/src/core/api/collectors.py @@ -26,6 +26,16 @@ def get(self, collector_id): return osint_source.OSINTSource.get_all_for_collector_json(node, parameters.collector_type) +class OSINTSourceLastAttempt(Resource): + + @api_key_required + def get(self, osint_source_id): + source = osint_source.OSINTSource.get_by_id(osint_source_id) + if not source: + return {}, 404 + source.update_last_attempt(osint_source_id) + return {}, 200 + class AddNewsItems(Resource): @api_key_required @@ -68,5 +78,6 @@ def get(self, collector_id): def initialize(api): api.add_resource(OSINTSourcesForCollectors, "/api/v1/collectors//osint-sources") api.add_resource(OSINTSourceStatusUpdate, "/api/v1/collectors/osint-sources/") + api.add_resource(OSINTSourceLastAttempt, "/api/v1/collectors/osint-sources//attempt") api.add_resource(CollectorStatusUpdate, "/api/v1/collectors/") api.add_resource(AddNewsItems, "/api/v1/collectors/news-items") diff --git a/src/core/model/news_item.py b/src/core/model/news_item.py index 850f38944..21378a87e 100644 --- a/src/core/model/news_item.py +++ b/src/core/model/news_item.py @@ -555,6 +555,9 @@ def add_news_items(cls, news_items_data_list): db.session.commit() + for source_id in osint_source_ids: + OSINTSource.update_collected(source_id) + return osint_source_ids @classmethod diff --git a/src/core/model/osint_source.py b/src/core/model/osint_source.py index 02c4b1a7d..bf0fe3742 100644 --- a/src/core/model/osint_source.py +++ b/src/core/model/osint_source.py @@ -209,18 +209,27 @@ def update(cls, osint_source_id, data): return osint_source, default_group - def update_status(self, status_schema): - # if not collected, do not change last collected timestamp - if status_schema.last_collected: - self.last_collected = status_schema.last_collected + @classmethod + def update_collected(cls, osint_source_id): + """Update collector's "last collected" record with current datetime (only when some data is collected). - # if not attempted, do not change last collected timestamp - if status_schema.last_attempted: - self.last_attempted = status_schema.last_attempted + Args: + osint_source_id (int): Osint source Id. + """ + osint_source = cls.query.get(osint_source_id) + osint_source.last_collected = datetime.now() + db.session.commit() - self.last_error_message = status_schema.last_error_message - self.last_data = status_schema.last_data + @classmethod + def update_last_attempt(cls, osint_source_id): + """Update collector's "last attempted" record with current datetime. + Args: + osint_source_id (int): Osint source Id. + """ + osint_source = cls.query.get(osint_source_id) + osint_source.last_attempted = datetime.now() + db.session.commit() class OSINTSourceParameterValue(db.Model): osint_source_id = db.Column(db.String, db.ForeignKey('osint_source.id'), primary_key=True) diff --git a/src/gui/src/components/config/osint_sources/CardSource.vue b/src/gui/src/components/config/osint_sources/CardSource.vue index e1ae15732..96f08e2c1 100644 --- a/src/gui/src/components/config/osint_sources/CardSource.vue +++ b/src/gui/src/components/config/osint_sources/CardSource.vue @@ -24,11 +24,18 @@
{{$t('card_item.description')}}
{{card.description}}
+ +
{{$t('osint_source.last_attempt')}}
+
{{card.last_attempted}}
+
+ +
{{$t('osint_source.last_collected')}}
+
{{card.last_collected}}
+
{{$t('osint_source.type')}}
{{card.collector.type}}
- diff --git a/src/gui/src/i18n/cs/messages.js b/src/gui/src/i18n/cs/messages.js index 6e5f9ac25..fcf12b3dc 100644 --- a/src/gui/src/i18n/cs/messages.js +++ b/src/gui/src/i18n/cs/messages.js @@ -202,6 +202,8 @@ const messages_cs = { error: "Tento zdroj OSINT se nepodařilo uložit", name: "Název", description: "Popis", + last_attempt: "Poslední pokus", + last_collected: "Poslední sběr", successful: "Nový zdroj OSINT byl přidán", successful_edit: "Zdroj OSINT byl upraven", removed: "Zdroj OSINT byl úspěšně smazán", diff --git a/src/gui/src/i18n/en/messages.js b/src/gui/src/i18n/en/messages.js index 20b634b92..7525737f1 100644 --- a/src/gui/src/i18n/en/messages.js +++ b/src/gui/src/i18n/en/messages.js @@ -202,6 +202,8 @@ const messages_en = { error: "Could not save this OSINT source", name: "Name", description: "Description", + last_attempt: "Last attempt", + last_collected: "Last collected", successful: "New OSINT source was successfully added", successful_edit: "OSINT source was successfully updated", removed: "OSINT source was successfully deleted", diff --git a/src/gui/src/i18n/sk/messages.js b/src/gui/src/i18n/sk/messages.js index ee3df1014..f26fa17cd 100644 --- a/src/gui/src/i18n/sk/messages.js +++ b/src/gui/src/i18n/sk/messages.js @@ -84,6 +84,8 @@ const messages_sk = { error: "Nepodarilo sa vytvoriť zadaný zdroj.", name: "Meno", description: "Popis", + last_attempt: "Posledný pokus", + last_collected: "Posledný zber", successful: "Nový OSINT zdroj bol úspešne pridaný" }, diff --git a/src/shared/shared/schema/osint_source.py b/src/shared/shared/schema/osint_source.py index 0bfbc6063..d6ca76ec7 100644 --- a/src/shared/shared/schema/osint_source.py +++ b/src/shared/shared/schema/osint_source.py @@ -58,6 +58,8 @@ class OSINTSourceSchema(OSINTSourceSchemaBase): name = fields.Str() description = fields.Str() collector_id = fields.Str() + last_attempted = fields.DateTime("%d.%m.%Y - %H:%M:%S") + last_collected = fields.DateTime("%d.%m.%Y - %H:%M:%S") class OSINTSourceCollectorSchema(Schema):