diff --git a/domainbigdata_service/CHANGELOG b/domainbigdata_service/CHANGELOG new file mode 100644 index 00000000..9dbaf0f5 --- /dev/null +++ b/domainbigdata_service/CHANGELOG @@ -0,0 +1,4 @@ +Version 0.0.1 +------------- + +This is an attempt at integrating a crits service for use domainbigdata (domainbigdata.com) on indicators email/domain & domains. diff --git a/domainbigdata_service/DEPENDENCIES b/domainbigdata_service/DEPENDENCIES new file mode 100644 index 00000000..13154420 --- /dev/null +++ b/domainbigdata_service/DEPENDENCIES @@ -0,0 +1 @@ +bs4 diff --git a/domainbigdata_service/LICENSE b/domainbigdata_service/LICENSE new file mode 100644 index 00000000..8866226d --- /dev/null +++ b/domainbigdata_service/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017, Lionel PRAT. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/domainbigdata_service/README b/domainbigdata_service/README new file mode 100644 index 00000000..dd8b8bd5 --- /dev/null +++ b/domainbigdata_service/README @@ -0,0 +1,6 @@ +Version 0.0.1 +------------- + +This is an attempt at integrating a crits service for use domainbigdata (domainbigdata.com) on indicators email/domain & domains. + + diff --git a/domainbigdata_service/__init__.py b/domainbigdata_service/__init__.py new file mode 100644 index 00000000..68538d19 --- /dev/null +++ b/domainbigdata_service/__init__.py @@ -0,0 +1,114 @@ +# (c) 2017, Lionel PRAT +# domainbigdata lookup on indicators email & domaines (https://domainbigdata.com) +# based on service preview of Adam Polkosnik +# use modified source code of Roberto Sponchioni - https://github.com/Ptr32Void/OSTrICa/blob/master/ostrica/Plugins/DomainBigData/__init__.py +# All rights reserved. + +import logging +import os +import io +import StringIO + +import IndicatorTypes + +# for adding the extracted files +from django.conf import settings +from django.template.loader import render_to_string +from crits.services.core import Service, ServiceConfigError + +from . import domainbigdata + +logger = logging.getLogger(__name__) + + +class DomainBigDataService(Service): + + name = "DomainBigData" + version = '0.0.1' + supported_types = ['Domain', 'Indicator'] + description = "DomainBigData lookup on indicators email & domains" + + @staticmethod + def valid_for(obj): + # Only run on indictors: URI, DOMAIN, IP, FILE HASH + # or on IP object == IP lookup + # or on domain object == Domain lookup + # or sample object == HASH lookup + if obj._meta['crits_type'] == 'Indicator' and (obj['ind_type'] == 'Email Address' or obj['ind_type'] == 'Email Address From' or obj['ind_type'] == 'Email Reply-To' or obj['ind_type'] == 'Domain'): + return True + elif obj._meta['crits_type'] == 'Domain': + return True + return False + + def run(self, obj, config): + self.config = config + self.obj = obj + user = self.current_task.user + #create DomainBigData object + self._info('RUN DomainBigData lookup') + dbd = domainbigdata.DomainBigData() + result = None + value_obj = None + if (obj._meta['crits_type'] == 'Indicator' and obj['ind_type'] == 'Domain') or obj._meta['crits_type'] == 'Domain': + if obj._meta['crits_type'] == 'Domain': + value_obj = obj.domain + else: + value_obj = obj['value'] + #run domain information + try: + self._info('Send request type domain on DomainBigData') + result = dbd.domain_information(value_obj, self) + except Exception as e: + self._error('Query DomainBigData error:' + str(e)) + elif (obj._meta['crits_type'] == 'Indicator') and (obj['ind_type'] == 'Email Address' or obj['ind_type'] == 'Email Address From' or obj['ind_type'] == 'Email Reply-To' or obj['ind_type'] == 'Domain'): + value_obj = obj['value'] + #email indicator + try: + self._info('Send request type email on DomainBigData') + result = dbd.email_information(value_obj, self) + except Exception as e: + self._error('Query DomainBigData error:' + str(e)) + else: + self._error('This object type cannot use service DomainBigData lookup.') + return False + if not result: + self._info('Result is empty') + return + self._info('Processing results:' + str(result)) + #self._info(str(result)) + #add result + if type(result) is dict: + for k,v in result.iteritems(): + if not type(v) is dict: + if type(v) is list: + count=1 + for item in v: + if type(item) is dict: + self._add_result('Result of DomainBigData on ' + value_obj, k+' -> '+str(count), item) + elif type(item) is list: + self._add_result('Result of DomainBigData on ' + value_obj, k+' -> '+str(count), {'value': str(item)}) + else: + self._add_result('Result of DomainBigData on ' + value_obj, k+' -> '+str(count), {'value': item}) + count+=1 + else: + self._add_result('Result of DomainBigData on ' + value_obj, k, {'value': v}) + if type(result) is dict: + for k,v in result.iteritems(): + if type(v) is dict: + for kx,vx in v.iteritems(): + if type(vx) is dict: + self._add_result('Result of DomainBigData on ' + value_obj + ' -- Result: ' + k, kx, {'value': vx}) + elif type(vx) is list: + count=1 + for item in vx: + if type(item) is dict: + self._add_result('Result of DomainBigData on ' + value_obj + ' -- Result: ' + k, kx+' -> '+str(count), item) + elif type(item) is list: + self._add_result('Result of DomainBigData on ' + value_obj + ' -- Result: ' + k, kx+' -> '+str(count), {'value': str(item)}) + else: + self._add_result('Result of DomainBigData on ' + value_obj + ' -- Result: ' + k, kx+' -> '+str(count), {'value': item}) + count+=1 + else: + self._add_result('Result of DomainBigData on ' + value_obj + ' -- Result: ' + k, kx, {'value': vx}) + self._info('END') + diff --git a/domainbigdata_service/bootstrap b/domainbigdata_service/bootstrap new file mode 100755 index 00000000..b7b70a8d --- /dev/null +++ b/domainbigdata_service/bootstrap @@ -0,0 +1,147 @@ +#!/bin/sh +# (c) 2016, The MITRE Corporation. All rights reserved. +# Source code distributed pursuant to license agreement. +# +# Usage: bootstrap +# This script is designed to install all of the necessary dependencies for the +# service. + +. ../funcs.sh + +ubuntu_install() +{ + printf "${HEAD}Installing dependencies with apt-get${END}\n" + #sudo apt-add-repository universe + #sudo apt-get update + #sudo apt-get install -y --fix-missing phantomjs + if [ $? -eq 0 ] + then + printf "${PASS}Ubuntu Install Complete${END}\n" + else + printf "${FAIL}Ubuntu Install Failed${END}\n" + fi + sudo ldconfig +} + +debian_install() +{ + printf "${HEAD}Installing dependencies with apt-get${END}\n" + #sudo apt-add-repository universe + #sudo apt-get update + #sudo apt-get install -y --fix-missing phantomjs + if [ $? -eq 0 ] + then + printf "${PASS}Debian Install Complete${END}\n" + else + printf "${FAIL}Debian Install Failed${END}\n" + fi + sudo ldconfig +} + +darwin_install() +{ + command -v brew >/dev/null 2>&1 || { + printf "${HEAD}Installation for OSX requires Homebrew. Please visit http://brew.sh/.${END}\n" + exit + } + #brew install chmlib clamav wireshark upx + if [ $? -eq 0 ] + then + printf "${PASS}Homebrew Install Complete${END}\n" + else + printf "${FAIL}Homebrew Install Failed${END}\n" + fi +} + +freebsd_install() +{ + #printf "${HEAD}Installing Ports${END}\n" + #sudo pkg install phantomjs + if [ $? -eq 0 ] + then + printf "${PASS}Ports Install Complete${END}\n" + else + printf "${FAIL}Ports Install Failed${END}\n" + fi +} + +red_hat_install() +{ + #printf "${HEAD}Installing Yum Packages${END}\n" + #sudo yum install phantomjs + if [ $? -eq 0 ] + then + printf "${PASS}Yum Install Complete${END}\n" + else + printf "${FAIL}Yum Install Failed${END}\n" + fi +} + +centos_install() +{ + #printf "${HEAD}Installing Yum Packages${END}\n" + #sudo yum install phantomjs + if [ $? -eq 0 ] + then + printf "${PASS}Yum Install Complete${END}\n" + else + printf "${FAIL}Yum Install Failed${END}\n" + fi +} +#=============================================================== +# This is the Beginning of the Script +#=============================================================== +# Sees if there is an argument +if [ -z $1 ]; +then + STEP=1 +else + STEP=$1 +fi + +while [ $STEP -lt 2 ] +do + case $STEP in + 1) + verify + if [ "$OS" = 'ubuntu' ] + then + #printf "${PASS}ubuntu is Supported!${END}\n" + ubuntu_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'debian' ] + then + #printf "${PASS}Debian is Supported!${END}\n" + debian_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'darwin' ] + then + #printf "${PASS}OS X is Supported!${END}\n" + darwin_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = "centos" ] + then + #printf "${PASS}CentOS is Supported!${END}\n" + centos_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = "red hat" ] + then + #printf "${PASS}Red Hat is Supported!${END}\n" + red_hat_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'freebsd' ] + then + #printf "${PASS}FreeBSD is Supported${END}\n" + freebsd_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + else + printf "${FAIL}OS: $OS, need Ubuntu, Debian, Darwin (OS X), CentOS, Red Hat, or FreeBSD${END}\n" + exit + fi + ;; + *) + exit + ;; + esac + STEP=$((STEP+1)) +done diff --git a/domainbigdata_service/domainbigdata.py b/domainbigdata_service/domainbigdata.py new file mode 100644 index 00000000..8365e93d --- /dev/null +++ b/domainbigdata_service/domainbigdata.py @@ -0,0 +1,261 @@ +############################################################################### +#Author: Lionel PRAT - Original author: Roberto Sponchioni - @Ptr32Void +#Modified: 12/10/2017 +#Modified source code: https://github.com/Ptr32Void/OSTrICa/blob/master/ostrica/Plugins/DomainBigData/__init__.py +############################################################################### +####################### HEAD OF ORIGIN SOURCE CODE: +#------------------------------------------------------------------------------- +# Name: OSTrICa - Open Source Threat Intelligence Collector - DomainBigData plugin +# Purpose: Collection and visualization of Threat Intelligence data +# +# Author: Roberto Sponchioni - @Ptr32Void +# +# Created: 20/12/2015 +# Licence: This file is part of OSTrICa. +# +# OSTrICa is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OSTrICa is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with OSTrICa. If not, see . +#------------------------------------------------------------------------------- +############################################################################### +import sys +import string +import re +from bs4 import BeautifulSoup + +import requests + +class DomainBigData: + + host = "domainbigdata.com" + + def __init__(self): + self.intelligence = {} + self.index_value = '' + self.intelligence_list = [] + pass + + def __del__(self): + self.intelligence = {} + + def email_information(self, email, log): + query = '/email/%s' % (email) + url = "http://%s%s"%(self.host,query) + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36'} + r = requests.get(url, headers=headers) + if r.status_code == 200: + content = r.text.encode('utf8') + self.collect_email_intelligence(content) + return self.intelligence + else: + return {domain: r.status_code} + + def collect_email_intelligence(self, server_response): + soup = BeautifulSoup(server_response, 'html.parser') + associated_sites = soup.findAll('table', {'class':'t1'}) + if len(associated_sites) == 1: + self.extract_associated_sites(associated_sites[0].tbody) + name_soup = soup.findAll('tr', {'id':'trRegistrantName'}) + if len(name_soup) == 1: + email2name = self.extract_information_from_dd(name_soup[0]) + self.intelligence['Domain_For_Name'] = email2name + org_soup = soup.findAll('tr', {'id':'trRegistrantName'}) + if len(org_soup) == 1: + email2org = self.extract_information_from_dd(org_soup[0]) + self.intelligence['Domain_For_Org'] = email2org + + def extract_associated_sites(self, soup): + associated_sites = [] + idx = 0 + related_sites = soup.findAll('td') + for site in related_sites: + if idx == 0: + associated_site = site.get_text() + idx += 1 + continue + elif idx == 1: + creation_date = site.get_text() + idx += 1 + continue + elif idx == 2: + registrar = site.get_text() + idx = 0 + associated_sites.append({'associated_site':associated_site, 'creation_date':creation_date, 'registrar':registrar}) + continue + self.intelligence['associated_sites'] = associated_sites + + def name2dom_collect_information(self, url): + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36'} + r = requests.get(url, headers=headers) + if r.status_code == 200: + content = r.text.encode('utf8') + return self.collect_email2dom_intelligence(content) + else: + return {domain: r.status_code} + + def collect_email2dom_intelligence(self, server_response): + soup = BeautifulSoup(server_response, 'html.parser') + associated_sites = soup.findAll('table', {'class':'t1'}) + if len(associated_sites) == 1: + return self.extract_associated_sites2nj(associated_sites[0].tbody) + + def extract_associated_sites2nj(self, soup): + associated_sites = [] + idx = 0 + related_sites = soup.findAll('td') + for site in related_sites: + if idx == 0: + associated_site = site.get_text() + idx += 1 + continue + elif idx == 1: + creation_date = site.get_text() + idx += 1 + continue + elif idx == 2: + registrar = site.get_text() + idx = 0 + associated_sites.append({'associated_site':associated_site, 'creation_date':creation_date, 'registrar':registrar}) + continue + return associated_sites + + def domain_information(self, domain, log): + query = '/%s' % (domain) + url = "http://%s%s"%(self.host,query) + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36'} + r = requests.get(url, headers=headers) + if r.status_code == 200: + content = r.text.encode('utf8') + self.collect_domain_intelligence(content) + return self.intelligence + else: + return {domain: r.status_code} + + def collect_domain_intelligence(self, server_response): + soup = BeautifulSoup(server_response, 'html.parser') + records = soup.findAll('div', {'id':'divDNSRecords'}) + + if len(records) == 1: + dns_records = records[0].findAll('table', {'class':'t1'}) + self.extract_associated_records(dns_records) + + records = soup.findAll('div', {'id':'divListOtherTLD'}) + if len(records) == 1: + tdls = [] + other_tdls = records[0].findAll('a') + for tdl in other_tdls: + tdls.append(tdl.string) + self.intelligence['other_tdls'] = tdls + + records = soup.findAll('div', {'id':'MainMaster_divRegistrantIDCard'}) + if len(records) == 1: + self.collect_registrant_information(records[0]) + + def collect_registrant_information(self, soup): + registrant_organization = '' + registrant_email = '' + registrant_name = '' + registrant_city = '' + registrant_country = '' + registrant_phone = '' + + organization_soup = soup.findAll('tr', {'id':'MainMaster_trRegistrantOrganization'}) + email_soup = soup.findAll('tr', {'id':'trRegistrantEmail'}) + name_soup = soup.findAll('tr', {'id':'trRegistrantName'}) + city_soup = soup.findAll('tr', {'id':'trRegistrantCity'}) + country_soup = soup.findAll('tr', {'id':'trRegistrantCountry'}) + phone_soup = soup.findAll('tr', {'id':'trRegistrantTel'}) + + if len(organization_soup) == 1: + registrant_organization = self.extract_information_from_registrant(organization_soup[0]) + orgdom = self.extract_information_from_dd(organization_soup[0]) + self.intelligence['Domain_For_Org'] = orgdom + + if len(email_soup) == 1: + registrant_email = self.extract_information_from_registrant(email_soup[0]) + emaildom = self.extract_information_from_dd(email_soup[0]) + self.intelligence['Domain_For_Email'] = emaildom + + if len(name_soup) == 1: + registrant_name = self.extract_information_from_registrant(name_soup[0]) + namedom = self.extract_information_from_dd(name_soup[0]) + self.intelligence['Domain_For_Name'] = namedom + + if len(city_soup) == 1: + registrant_city = self.extract_information_from_registrant(city_soup[0]) + + if len(country_soup) == 1: + registrant_country = self.extract_information_from_registrant(country_soup[0]) + + if len(phone_soup) == 1: + registrant_phone = self.extract_information_from_registrant(phone_soup[0]) + + self.intelligence['organization'] = registrant_organization + self.intelligence['email'] = registrant_email + self.intelligence['registrant_name'] = registrant_name + self.intelligence['registrant_city'] = registrant_city + self.intelligence['registrant_country'] = registrant_country + self.intelligence['registrant_phone'] = registrant_phone + + def extract_information_from_dd(self, soup): + soup = soup.findAll('td') + link = None + link = soup[1].find('a').get('href') + name = '' + if len(soup) == 3: + soup_img = soup[1].findAll('img') + if len(soup_img) == 1: + name = soup[1].contents[1] + else: + name = soup[1].string + elif len(soup) == 2: + name = soup[1].string + #get dom + if link: + domains = self.name2dom_collect_information("http://%s%s"%(self.host,link)) + return {name: domains} + + def extract_information_from_registrant(self, soup): + soup = soup.findAll('td') + if len(soup) == 3: + soup_img = soup[1].findAll('img') + if len(soup_img) == 1: + return soup[1].contents[1] + else: + return soup[1].string + elif len(soup) == 2: + return soup[1].string + return '' + + def extract_associated_records(self, soups): + associated_records = [] + for soup in soups: + all_trs = soup.findAll('tr') + self.extract_trs(all_trs) + self.intelligence[self.index_value] = self.intelligence_list + self.intelligence_list = [] + + def extract_trs(self, soup): + for tr in soup: + self.extract_tds(tr) + + def extract_tds(self, soup): + idx = True # idx flags the type of record that will be added in the dictionary if True + record_list = [] + for td in soup: + if idx and td.get_text() not in self.intelligence.keys(): + self.index_value = td.get_text() + self.intelligence[self.index_value] = '' + idx = False + record_list.append(td.get_text()) + self.intelligence_list.append(record_list) + diff --git a/domainbigdata_service/requirements.txt b/domainbigdata_service/requirements.txt new file mode 100644 index 00000000..13154420 --- /dev/null +++ b/domainbigdata_service/requirements.txt @@ -0,0 +1 @@ +bs4