Skip to content

Commit

Permalink
wappalyzer
Browse files Browse the repository at this point in the history
  • Loading branch information
benbalter authored and balterbot committed Nov 13, 2020
1 parent 66e3633 commit 366188f
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Gemfile.lock
/.env
/tmp
/cache
node_modules
node_modules
11 changes: 7 additions & 4 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2020-11-13 00:44:58 UTC using RuboCop version 1.3.0.
# on 2020-11-13 02:59:59 UTC using RuboCop version 1.3.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
Expand Down Expand Up @@ -52,20 +52,21 @@ RSpec/EmptyExampleGroup:
Exclude:
- 'spec/checks/site_inspector_endpoint_accessibility_spec.rb'

# Offense count: 23
# Offense count: 24
# Configuration parameters: Max.
RSpec/ExampleLength:
Exclude:
- 'spec/checks/site_inspector_endpoint_dns_spec.rb'
- 'spec/checks/site_inspector_endpoint_hsts_spec.rb'
- 'spec/checks/site_inspector_endpoint_https_spec.rb'
- 'spec/checks/site_inspector_endpoint_sniffer_spec.rb'
- 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb'
- 'spec/site_inspector_disk_cache_spec.rb'
- 'spec/site_inspector_domain_spec.rb'
- 'spec/site_inspector_endpoint_spec.rb'
- 'spec/site_inspector_spec.rb'

# Offense count: 14
# Offense count: 15
# Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
# Include: **/*_spec*rb*, **/spec/**/*
RSpec/FilePath:
Expand All @@ -79,6 +80,7 @@ RSpec/FilePath:
- 'spec/checks/site_inspector_endpoint_hsts_spec.rb'
- 'spec/checks/site_inspector_endpoint_https_spec.rb'
- 'spec/checks/site_inspector_endpoint_sniffer_spec.rb'
- 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb'
- 'spec/checks/site_inspector_endpoint_whois_spec.rb'
- 'spec/site_inspector_cache_spec.rb'
- 'spec/site_inspector_disk_cache_spec.rb'
Expand All @@ -89,7 +91,7 @@ RSpec/FilePath:
RSpec/MultipleExpectations:
Max: 5

# Offense count: 238
# Offense count: 240
# Configuration parameters: IgnoreSharedExamples.
RSpec/NamedSubject:
Exclude:
Expand All @@ -102,6 +104,7 @@ RSpec/NamedSubject:
- 'spec/checks/site_inspector_endpoint_hsts_spec.rb'
- 'spec/checks/site_inspector_endpoint_https_spec.rb'
- 'spec/checks/site_inspector_endpoint_sniffer_spec.rb'
- 'spec/checks/site_inspector_endpoint_wappalyzer_spec.rb'
- 'spec/checks/site_inspector_endpoint_whois_spec.rb'
- 'spec/site_inspector_cache_spec.rb'
- 'spec/site_inspector_disk_cache_spec.rb'
Expand Down
2 changes: 2 additions & 0 deletions lib/site-inspector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
require 'whois'
require 'cgi'
require 'resolv'
require 'dotenv/load'

require_relative 'site-inspector/cache'
require_relative 'site-inspector/disk_cache'
Expand All @@ -24,6 +25,7 @@
require_relative 'site-inspector/checks/sniffer'
require_relative 'site-inspector/checks/cookies'
require_relative 'site-inspector/checks/whois'
require_relative 'site-inspector/checks/wappalyzer'
require_relative 'site-inspector/endpoint'
require_relative 'site-inspector/version'
require_relative 'cliver/dependency_ext'
Expand Down
62 changes: 62 additions & 0 deletions lib/site-inspector/checks/wappalyzer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# frozen_string_literal: true

class SiteInspector
class Endpoint
class Wappalyzer < Check
ENDPOINT = 'https://api.wappalyzer.com/lookup/v2/'

def to_h
return {} unless data['technologies']

@to_h ||= begin
technologies = {}
data['technologies'].each do |t|
category = t['categories'].first
category = category ? category['name'] : 'Other'
technologies[category] ||= []
technologies[category].push t['name']
end

technologies
end
end

private

def request
@request ||= begin
options = SiteInspector.typhoeus_defaults
headers = options[:headers].merge({ "x-api-key": api_key })
options = options.merge(method: :get, headers: headers)
Typhoeus::Request.new(url, options)
end
end

def data
return {} unless api_key && api_key != ''

@data ||= begin
SiteInspector.hydra.queue(request)
SiteInspector.hydra.run

response = request.response
if response.success?
JSON.parse(response.body).first
else
{}
end
end
end

def url
url = Addressable::URI.parse(ENDPOINT)
url.query_values = { urls: endpoint.uri }
url
end

def api_key
@api_key ||= ENV['WAPPALYZER_API_KEY']
end
end
end
end
2 changes: 2 additions & 0 deletions site-inspector.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Gem::Specification.new do |s|
s.add_dependency('cliver', '~> 0.0')
s.add_dependency('colorator', '~> 1.1')
s.add_dependency('dnsruby', '~> 1.0')
s.add_dependency('dotenv', '~> 2.0')
s.add_dependency('gman', '~> 7.0', '>= 7.0.4')
s.add_dependency('mercenary', '~> 0.0')
s.add_dependency('nokogiri', '~> 1.0')
Expand All @@ -28,6 +29,7 @@ Gem::Specification.new do |s|
s.add_dependency('public_suffix', '~> 4.0')
s.add_dependency('sniffles', '~> 0.0')
s.add_dependency('typhoeus', '~> 1.0')
s.add_dependency('urlscan', '~> 0.6')
s.add_dependency('whois', '~> 5.0')

s.add_development_dependency('pry', '~> 0.0')
Expand Down
34 changes: 34 additions & 0 deletions spec/checks/site_inspector_endpoint_wappalyzer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

require 'spec_helper'

describe SiteInspector::Endpoint::Wappalyzer do
subject { described_class.new(endpoint) }

let(:domain) { 'http://ben.balter.com.com' }
let(:endpoint) { SiteInspector::Endpoint.new(domain) }
let(:url) { "https://api.wappalyzer.com/lookup/v2/?urls=#{domain}/" }

before do
path = File.expand_path '../fixtures/wappalyzer.json', __dir__
body = File.read path
stub_request(:get, url).to_return(status: 200, body: body)
end

it 'returns the API response' do
expected = {
'Analytics' => ['Google Analytics'],
'CDN' => %w[Cloudflare Fastly],
'Caching' => ['Varnish'],
'Other' => %w[Disqus Jekyll],
'PaaS' => ['GitHub Pages'],
'Web frameworks' => ['Ruby on Rails']
}
expect(subject.to_h).to eql(expected)
end

it 'fails gracefully' do
stub_request(:get, url).to_return(status: 400, body: '')
expect(subject.to_h).to eql({})
end
end
125 changes: 125 additions & 0 deletions spec/fixtures/wappalyzer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
[
{
"url":"https://ben.balter.com",
"technologies":[
{
"slug":"cloudflare",
"name":"Cloudflare",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":31,
"slug":"cdn",
"name":"CDN"
}
]
},
{
"slug":"varnish",
"name":"Varnish",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":23,
"slug":"caching",
"name":"Caching"
}
]
},
{
"slug":"disqus",
"name":"Disqus",
"versions":[

],
"trafficRank":11,
"categories":[

]
},
{
"slug":"google-analytics",
"name":"Google Analytics",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":10,
"slug":"analytics",
"name":"Analytics"
},
{
"id":61,
"slug":"saas",
"name":"SaaS"
}
]
},
{
"slug":"jekyll",
"name":"Jekyll",
"versions":[
"v3.9.0"
],
"trafficRank":11,
"categories":[

]
},
{
"slug":"ruby-on-rails",
"name":"Ruby on Rails",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":18,
"slug":"web-frameworks",
"name":"Web frameworks"
}
]
},
{
"slug":"fastly",
"name":"Fastly",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":31,
"slug":"cdn",
"name":"CDN"
}
]
},
{
"slug":"github-pages",
"name":"GitHub Pages",
"versions":[

],
"trafficRank":11,
"categories":[
{
"id":62,
"slug":"paas",
"name":"PaaS"
}
]
}
],
"crawl":true
}
]

0 comments on commit 366188f

Please sign in to comment.