Skip to content

Commit

Permalink
Merge pull request #316 from Progress1/last_collect
Browse files Browse the repository at this point in the history
Added "Last attempt" and  "Last collected" columns in OSINT sources
  • Loading branch information
Progress1 authored Jun 26, 2024
2 parents 99957d1 + 288d523 commit a38013d
Show file tree
Hide file tree
Showing 18 changed files with 112 additions and 21 deletions.
6 changes: 6 additions & 0 deletions src/collectors/collectors/atom_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ class AtomCollector(BaseCollector):

@BaseCollector.ignore_exceptions
def collect(self, source):
"""Collect data from Atom feed.
Arguments:
source -- Source object.
"""

BaseCollector.update_last_attempt(source)
feed_url = source.parameter_values['ATOM_FEED_URL']
user_agent = source.parameter_values['USER_AGENT']
interval = source.parameter_values['REFRESH_INTERVAL']
Expand Down
6 changes: 6 additions & 0 deletions src/collectors/collectors/base_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def get_info(self):
def collect(self, source):
pass

@staticmethod
def update_last_attempt(source):
response, status_code = CoreApi.update_collector_last_attepmt(source.id)
if status_code != 200:
log_critical("Update last attempt: HTTP {}, response: {}".format(status_code, response))

# wrap scheduled action with exception because scheduler fail plan next one
@staticmethod
def ignore_exceptions(func):
Expand Down
7 changes: 6 additions & 1 deletion src/collectors/collectors/email_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,14 @@ class EmailCollector(BaseCollector):

@BaseCollector.ignore_exceptions
def collect(self, source):
"""Collect data from email source.
news_items = []
Arguments:
source -- Source object.
"""

BaseCollector.update_last_attempt(source)
news_items = []
email_server_type = source.parameter_values['EMAIL_SERVER_TYPE']
email_server_hostname = source.parameter_values['EMAIL_SERVER_HOSTNAME']
email_server_port = source.parameter_values['EMAIL_SERVER_PORT']
Expand Down
2 changes: 2 additions & 0 deletions src/collectors/collectors/rss_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def collect(self, source):
Arguments:
source -- Source object.
"""

BaseCollector.update_last_attempt(source)
feed_url = source.parameter_values["FEED_URL"]
interval = source.parameter_values["REFRESH_INTERVAL"]
links_limit = BaseCollector.read_int_parameter("LINKS_LIMIT", 0, source)
Expand Down
6 changes: 6 additions & 0 deletions src/collectors/collectors/scheduled_tasks_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ class ScheduledTasksCollector(BaseCollector):

@BaseCollector.ignore_exceptions
def collect(self, source):
"""Collect data from scheduled tasks.
Arguments:
source -- Source object.
"""

BaseCollector.update_last_attempt(source)
news_items = []
head, tail = os.path.split(source.parameter_values['TASK_COMMAND'])
task_title = source.parameter_values['TASK_TITLE']
Expand Down
7 changes: 6 additions & 1 deletion src/collectors/collectors/slack_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@ class SlackCollector(BaseCollector):

@BaseCollector.ignore_exceptions
def collect(self, source):
"""Collect data from Slack source.
news_items = []
Arguments:
source -- Source object.
"""

BaseCollector.update_last_attempt(source)
news_items = []
proxy_server = source.parameter_values['PROXY_SERVER']

if proxy_server:
Expand Down
6 changes: 6 additions & 0 deletions src/collectors/collectors/twitter_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,14 @@ class TwitterCollector(BaseCollector):

@BaseCollector.ignore_exceptions
def collect(self, source):
"""Collect data from X source.
Arguments:
source -- Source object.
"""

try:
BaseCollector.update_last_attempt(source)
news_items = []
attributes = []

Expand Down
12 changes: 7 additions & 5 deletions src/collectors/collectors/web_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ def __run_tor(self):
def collect(self, source):
"""Collects news items from this source (main function)"""

BaseCollector.update_last_attempt(source)
self.source = source
log_manager.log_collector_activity('web', self.source.name, 'Starting collector')

Expand Down Expand Up @@ -597,9 +598,11 @@ def __process_title_page_articles(self, browser, title_page_handle, index_url):
for item in article_items:
count += 1
# try:
# print(item.get_attribute('outerHTML'), flush=True)
# except Exception:
# print("H: {0} {1:.200}".format(count, item.get_attribute('outerHTML')), flush=True)
# except Exception as ex:
# pass
# if first item works but next items have problems - it's because this:
# https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/#stale-element-reference-exception
link = None
try:
link = item.get_attribute('href')
Expand Down Expand Up @@ -632,10 +635,9 @@ def __process_title_page_articles(self, browser, title_page_handle, index_url):
try:
news_item = self.__process_article_page(index_url, browser)
if news_item:
log_manager.log_collector_activity('web', self.source.name, 'Successfully parsed an article')
# log_manager.log_collector_activity('web', self.source.name, '... Title : {0}'.format(news_item.title))
# log_manager.log_collector_activity('web', self.source.name, '... Review : {0:.100}'.format(news_item.review))
# log_manager.log_collector_activity('web', self.source.name, '... Content : {0:.100}'.format(news_item.content))
# log_manager.log_collector_activity('web', self.source.name, '... Review : {0:.100}'.format(news_item.review.replace("\r", "").replace("\n", " ").strip()))
# log_manager.log_collector_activity('web', self.source.name, '... Content : {0:.100}'.format(news_item.content.replace("\r", "").replace("\n", " ").strip()))
# log_manager.log_collector_activity('web', self.source.name, '... Author : {0}'.format(news_item.author))
# log_manager.log_collector_activity('web', self.source.name, '... Published: {0}'.format(news_item.published))
self.news_items.append(news_item)
Expand Down
9 changes: 5 additions & 4 deletions src/collectors/managers/collectors_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import threading
import time

from managers.log_manager import log_debug, log_system_activity
from managers.log_manager import log_debug, log_system_activity, log_warning
from collectors.atom_collector import AtomCollector
from collectors.email_collector import EmailCollector
from collectors.manual_collector import ManualCollector
Expand All @@ -19,9 +19,10 @@
def reportStatus():
while True:
log_debug("[{}] Sending status update...".format(__name__))
response, code = CoreApi.update_collector_status()
log_debug("[{}] Core responded with: HTTP {}, {}".format(__name__, code, response))
# for debuging scheduler tasks
response, status_code = CoreApi.update_collector_status()
if status_code != 200:
log_warning("[{}] Core status update response: HTTP {}, {}".format(__name__, status_code, response))
# for debugging scheduler tasks
# for key in collectors:
# for source in collectors[key].osint_sources:
# if hasattr(source, "scheduler_job"):
Expand Down
14 changes: 14 additions & 0 deletions src/collectors/remote/core_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ def update_collector_status(cls):
logger.debug(ex)
return ex, 400

@classmethod
def update_collector_last_attepmt(cls, source_id):
"""Update collector's "last attempted" record with current datetime.
Returns:
tuple: A tuple containing the JSON response and the HTTP status code.
"""
try:
response = requests.get(cls.api_url + "/api/v1/collectors/osint-sources/" + urllib.parse.quote(source_id) + "/attempt", headers=cls.headers)
return response.json(), response.status_code
except Exception as ex:
logger.debug(ex)
return ex, 400

@classmethod
def add_news_items(cls, news_items):
"""Add news items to the collector.
Expand Down
11 changes: 11 additions & 0 deletions src/core/api/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ def get(self, collector_id):
return osint_source.OSINTSource.get_all_for_collector_json(node, parameters.collector_type)


class OSINTSourceLastAttempt(Resource):

@api_key_required
def get(self, osint_source_id):
source = osint_source.OSINTSource.get_by_id(osint_source_id)
if not source:
return {}, 404
source.update_last_attempt(osint_source_id)
return {}, 200

class AddNewsItems(Resource):

@api_key_required
Expand Down Expand Up @@ -68,5 +78,6 @@ def get(self, collector_id):
def initialize(api):
api.add_resource(OSINTSourcesForCollectors, "/api/v1/collectors/<string:collector_id>/osint-sources")
api.add_resource(OSINTSourceStatusUpdate, "/api/v1/collectors/osint-sources/<string:osint_source_id>")
api.add_resource(OSINTSourceLastAttempt, "/api/v1/collectors/osint-sources/<string:osint_source_id>/attempt")
api.add_resource(CollectorStatusUpdate, "/api/v1/collectors/<string:collector_id>")
api.add_resource(AddNewsItems, "/api/v1/collectors/news-items")
3 changes: 3 additions & 0 deletions src/core/model/news_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,9 @@ def add_news_items(cls, news_items_data_list):

db.session.commit()

for source_id in osint_source_ids:
OSINTSource.update_collected(source_id)

return osint_source_ids

@classmethod
Expand Down
27 changes: 18 additions & 9 deletions src/core/model/osint_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,18 +209,27 @@ def update(cls, osint_source_id, data):

return osint_source, default_group

def update_status(self, status_schema):
# if not collected, do not change last collected timestamp
if status_schema.last_collected:
self.last_collected = status_schema.last_collected
@classmethod
def update_collected(cls, osint_source_id):
"""Update collector's "last collected" record with current datetime (only when some data is collected).
# if not attempted, do not change last collected timestamp
if status_schema.last_attempted:
self.last_attempted = status_schema.last_attempted
Args:
osint_source_id (int): Osint source Id.
"""
osint_source = cls.query.get(osint_source_id)
osint_source.last_collected = datetime.now()
db.session.commit()

self.last_error_message = status_schema.last_error_message
self.last_data = status_schema.last_data
@classmethod
def update_last_attempt(cls, osint_source_id):
"""Update collector's "last attempted" record with current datetime.
Args:
osint_source_id (int): Osint source Id.
"""
osint_source = cls.query.get(osint_source_id)
osint_source.last_attempted = datetime.now()
db.session.commit()

class OSINTSourceParameterValue(db.Model):
osint_source_id = db.Column(db.String, db.ForeignKey('osint_source.id'), primary_key=True)
Expand Down
9 changes: 8 additions & 1 deletion src/gui/src/components/config/osint_sources/CardSource.vue
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,18 @@
<div class="grey--text">{{$t('card_item.description')}}</div>
<div>{{card.description}}</div>
</v-col>
<v-col>
<div class="grey--text">{{$t('osint_source.last_attempt')}}</div>
<div>{{card.last_attempted}}</div>
</v-col>
<v-col>
<div class="grey--text">{{$t('osint_source.last_collected')}}</div>
<div>{{card.last_collected}}</div>
</v-col>
<v-col>
<div class="grey--text">{{$t('osint_source.type')}}</div>
<div>{{card.collector.type}}</div>
</v-col>

<!--HOVER TOOLBAR-->
<v-col :style="UI.STYLE.card_hover_toolbar">
<v-row v-if="hover" v-bind="UI.CARD.TOOLBAR.COMPACT" :style="UI.STYLE.card_toolbar">
Expand Down
2 changes: 2 additions & 0 deletions src/gui/src/i18n/cs/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ const messages_cs = {
error: "Tento zdroj OSINT se nepodařilo uložit",
name: "Název",
description: "Popis",
last_attempt: "Poslední pokus",
last_collected: "Poslední sběr",
successful: "Nový zdroj OSINT byl přidán",
successful_edit: "Zdroj OSINT byl upraven",
removed: "Zdroj OSINT byl úspěšně smazán",
Expand Down
2 changes: 2 additions & 0 deletions src/gui/src/i18n/en/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ const messages_en = {
error: "Could not save this OSINT source",
name: "Name",
description: "Description",
last_attempt: "Last attempt",
last_collected: "Last collected",
successful: "New OSINT source was successfully added",
successful_edit: "OSINT source was successfully updated",
removed: "OSINT source was successfully deleted",
Expand Down
2 changes: 2 additions & 0 deletions src/gui/src/i18n/sk/messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ const messages_sk = {
error: "Nepodarilo sa vytvoriť zadaný zdroj.",
name: "Meno",
description: "Popis",
last_attempt: "Posledný pokus",
last_collected: "Posledný zber",
successful: "Nový OSINT zdroj bol úspešne pridaný"
},

Expand Down
2 changes: 2 additions & 0 deletions src/shared/shared/schema/osint_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class OSINTSourceSchema(OSINTSourceSchemaBase):
name = fields.Str()
description = fields.Str()
collector_id = fields.Str()
last_attempted = fields.DateTime("%d.%m.%Y - %H:%M:%S")
last_collected = fields.DateTime("%d.%m.%Y - %H:%M:%S")


class OSINTSourceCollectorSchema(Schema):
Expand Down

0 comments on commit a38013d

Please sign in to comment.