diff --git a/.gitignore b/.gitignore index f562212..6db848c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ Gemfile.lock /.env /tmp /cache -node_modules +node_modules \ No newline at end of file diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 6c5ca21..4509efd 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2020-11-13 00:44:58 UTC using RuboCop version 1.3.0. +# on 2020-11-13 02:59:59 UTC using RuboCop version 1.3.0. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -52,7 +52,7 @@ RSpec/EmptyExampleGroup: Exclude: - 'spec/checks/site_inspector_endpoint_accessibility_spec.rb' -# Offense count: 23 +# Offense count: 24 # Configuration parameters: Max. RSpec/ExampleLength: Exclude: @@ -60,12 +60,13 @@ RSpec/ExampleLength: - 'spec/checks/site_inspector_endpoint_hsts_spec.rb' - 'spec/checks/site_inspector_endpoint_https_spec.rb' - 'spec/checks/site_inspector_endpoint_sniffer_spec.rb' + - 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb' - 'spec/site_inspector_disk_cache_spec.rb' - 'spec/site_inspector_domain_spec.rb' - 'spec/site_inspector_endpoint_spec.rb' - 'spec/site_inspector_spec.rb' -# Offense count: 14 +# Offense count: 15 # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly. # Include: **/*_spec*rb*, **/spec/**/* RSpec/FilePath: @@ -79,6 +80,7 @@ RSpec/FilePath: - 'spec/checks/site_inspector_endpoint_hsts_spec.rb' - 'spec/checks/site_inspector_endpoint_https_spec.rb' - 'spec/checks/site_inspector_endpoint_sniffer_spec.rb' + - 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb' - 'spec/checks/site_inspector_endpoint_whois_spec.rb' - 'spec/site_inspector_cache_spec.rb' - 'spec/site_inspector_disk_cache_spec.rb' @@ -89,7 +91,7 @@ RSpec/FilePath: RSpec/MultipleExpectations: Max: 5 -# Offense count: 238 +# Offense count: 240 # Configuration parameters: IgnoreSharedExamples. RSpec/NamedSubject: Exclude: @@ -102,6 +104,7 @@ RSpec/NamedSubject: - 'spec/checks/site_inspector_endpoint_hsts_spec.rb' - 'spec/checks/site_inspector_endpoint_https_spec.rb' - 'spec/checks/site_inspector_endpoint_sniffer_spec.rb' + - 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb' - 'spec/checks/site_inspector_endpoint_whois_spec.rb' - 'spec/site_inspector_cache_spec.rb' - 'spec/site_inspector_disk_cache_spec.rb' diff --git a/lib/site-inspector.rb b/lib/site-inspector.rb index 7da97e2..4634a38 100644 --- a/lib/site-inspector.rb +++ b/lib/site-inspector.rb @@ -9,6 +9,7 @@ require 'whois' require 'cgi' require 'resolv' +require 'dotenv/load' require_relative 'site-inspector/cache' require_relative 'site-inspector/disk_cache' @@ -24,6 +25,7 @@ require_relative 'site-inspector/checks/sniffer' require_relative 'site-inspector/checks/cookies' require_relative 'site-inspector/checks/whois' +require_relative 'site-inspector/checks/wappalyzer' require_relative 'site-inspector/endpoint' require_relative 'site-inspector/version' require_relative 'cliver/dependency_ext' diff --git a/lib/site-inspector/checks/wappalyzer.rb b/lib/site-inspector/checks/wappalyzer.rb new file mode 100644 index 0000000..395aeab --- /dev/null +++ b/lib/site-inspector/checks/wappalyzer.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +class SiteInspector + class Endpoint + class Wappalyzer < Check + ENDPOINT = 'https://api.wappalyzer.com/lookup/v2/' + + def to_h + return {} unless data['technologies'] + + @to_h ||= begin + technologies = {} + data['technologies'].each do |t| + category = t['categories'].first + category = category ? category['name'] : 'Other' + technologies[category] ||= [] + technologies[category].push t['name'] + end + + technologies + end + end + + private + + def request + @request ||= begin + options = SiteInspector.typhoeus_defaults + headers = options[:headers].merge({ "x-api-key": api_key }) + options = options.merge(method: :get, headers: headers) + Typhoeus::Request.new(url, options) + end + end + + def data + return {} unless api_key && api_key != '' + + @data ||= begin + SiteInspector.hydra.queue(request) + SiteInspector.hydra.run + + response = request.response + if response.success? + JSON.parse(response.body).first + else + {} + end + end + end + + def url + url = Addressable::URI.parse(ENDPOINT) + url.query_values = { urls: endpoint.uri } + url + end + + def api_key + @api_key ||= ENV['WAPPALYZER_API_KEY'] + end + end + end +end diff --git a/site-inspector.gemspec b/site-inspector.gemspec index ca54907..bfff8d4 100644 --- a/site-inspector.gemspec +++ b/site-inspector.gemspec @@ -20,6 +20,7 @@ Gem::Specification.new do |s| s.add_dependency('cliver', '~> 0.0') s.add_dependency('colorator', '~> 1.1') s.add_dependency('dnsruby', '~> 1.0') + s.add_dependency('dotenv', '~> 2.0') s.add_dependency('gman', '~> 7.0', '>= 7.0.4') s.add_dependency('mercenary', '~> 0.0') s.add_dependency('nokogiri', '~> 1.0') @@ -28,6 +29,7 @@ Gem::Specification.new do |s| s.add_dependency('public_suffix', '~> 4.0') s.add_dependency('sniffles', '~> 0.0') s.add_dependency('typhoeus', '~> 1.0') + s.add_dependency('urlscan', '~> 0.6') s.add_dependency('whois', '~> 5.0') s.add_development_dependency('pry', '~> 0.0') diff --git a/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb b/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb new file mode 100644 index 0000000..a661d6d --- /dev/null +++ b/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe SiteInspector::Endpoint::Wappalyzer do + subject { described_class.new(endpoint) } + + let(:domain) { 'http://ben.balter.com.com' } + let(:endpoint) { SiteInspector::Endpoint.new(domain) } + let(:url) { "https://api.wappalyzer.com/lookup/v2/?urls=#{domain}/" } + + before do + path = File.expand_path '../fixtures/wappalyzer.json', __dir__ + body = File.read path + stub_request(:get, url).to_return(status: 200, body: body) + end + + it 'returns the API response' do + expected = { + 'Analytics' => ['Google Analytics'], + 'CDN' => %w[Cloudflare Fastly], + 'Caching' => ['Varnish'], + 'Other' => %w[Disqus Jekyll], + 'PaaS' => ['GitHub Pages'], + 'Web frameworks' => ['Ruby on Rails'] + } + expect(subject.to_h).to eql(expected) + end + + it 'fails gracefully' do + stub_request(:get, url).to_return(status: 400, body: '') + expect(subject.to_h).to eql({}) + end +end diff --git a/spec/fixtures/wappalyzer.json b/spec/fixtures/wappalyzer.json new file mode 100644 index 0000000..20a796b --- /dev/null +++ b/spec/fixtures/wappalyzer.json @@ -0,0 +1,125 @@ +[ + { + "url":"https://ben.balter.com", + "technologies":[ + { + "slug":"cloudflare", + "name":"Cloudflare", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":31, + "slug":"cdn", + "name":"CDN" + } + ] + }, + { + "slug":"varnish", + "name":"Varnish", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":23, + "slug":"caching", + "name":"Caching" + } + ] + }, + { + "slug":"disqus", + "name":"Disqus", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + + ] + }, + { + "slug":"google-analytics", + "name":"Google Analytics", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":10, + "slug":"analytics", + "name":"Analytics" + }, + { + "id":61, + "slug":"saas", + "name":"SaaS" + } + ] + }, + { + "slug":"jekyll", + "name":"Jekyll", + "versions":[ + "v3.9.0" + ], + "trafficRank":11, + "categories":[ + + ] + }, + { + "slug":"ruby-on-rails", + "name":"Ruby on Rails", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":18, + "slug":"web-frameworks", + "name":"Web frameworks" + } + ] + }, + { + "slug":"fastly", + "name":"Fastly", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":31, + "slug":"cdn", + "name":"CDN" + } + ] + }, + { + "slug":"github-pages", + "name":"GitHub Pages", + "versions":[ + + ], + "trafficRank":11, + "categories":[ + { + "id":62, + "slug":"paas", + "name":"PaaS" + } + ] + } + ], + "crawl":true + } +] \ No newline at end of file