diff --git a/Gemfile b/Gemfile index c724b7c..6fc4688 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' # Specify your gem's dependencies in statement.gemspec diff --git a/Rakefile b/Rakefile old mode 100644 new mode 100755 index 08849ba..cf0790b --- a/Rakefile +++ b/Rakefile @@ -1,6 +1,8 @@ #!/usr/bin/env rake +# frozen_string_literal: true + require 'bundler' -require "bundler/gem_tasks" +require 'bundler/gem_tasks' Bundler::GemHelper.install_tasks require 'rake/testtask' @@ -10,4 +12,4 @@ Rake::TestTask.new(:test) do |test| test.verbose = true end -task :default => :test +task default: :test diff --git a/benchmark/rfeedfinder_benchmark.rb b/benchmark/rfeedfinder_benchmark.rb index 62f9930..221fe03 100644 --- a/benchmark/rfeedfinder_benchmark.rb +++ b/benchmark/rfeedfinder_benchmark.rb @@ -1,30 +1,30 @@ -require "benchmark" -require "rubygems" +# frozen_string_literal: true + +require 'benchmark' +require 'rubygems' sites = [ - "log.damog.net", - "http://cnn.com", - "scripting.com", - "mx.planetalinux.org", - "http://feedproxy.google.com/UniversoPlanetaLinux", + 'log.damog.net', + 'http://cnn.com', + 'scripting.com', + 'mx.planetalinux.org', + 'http://feedproxy.google.com/UniversoPlanetaLinux' ] Benchmark.bm do |x| - sites.each do |site| - puts "#{site}:" - - puts " feedbag" - x.report { - require 'feedbag' - Feedbag.find(site) - } + sites.each do |site| + puts "#{site}:" - puts " rfeedfinder" - x.report { - require 'rfeedfinder' - Rfeedfinder.feed(site) - } + puts ' feedbag' + x.report do + require 'feedbag' + Feedbag.find(site) + end - end + puts ' rfeedfinder' + x.report do + require 'rfeedfinder' + Rfeedfinder.feed(site) + end + end end - diff --git a/bin/console b/bin/console index 75ec769..ef1351f 100755 --- a/bin/console +++ b/bin/console @@ -1,9 +1,9 @@ #!/usr/bin/env ruby # frozen_string_literal: true -require "bundler/setup" -require "feedbag" -require "byebug" +require 'bundler/setup' +require 'feedbag' +require 'byebug' # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. @@ -11,5 +11,5 @@ require "byebug" # require "pry" # Pry.start -require "irb" +require 'irb' IRB.start(__FILE__) diff --git a/bin/feedbag b/bin/feedbag index c8e6135..dc1b270 100755 --- a/bin/feedbag +++ b/bin/feedbag @@ -1,12 +1,13 @@ #!/usr/bin/env ruby +# frozen_string_literal: true -require "rubygems" -require "feedbag" +require 'rubygems' +require 'feedbag' def usage - %Q{ - #{$0} [ ... ] - } + %( + #{$PROGRAM_NAME} [ ... ] + ) end if ARGV.empty? @@ -18,11 +19,10 @@ ARGV.each do |url| puts "== #{url}:" feeds = Feedbag.find url if feeds.empty? - puts " no feeds found!" + puts ' no feeds found!' else feeds.each do |f| puts " - #{f}" end end end - diff --git a/feedbag.gemspec b/feedbag.gemspec index 078593c..89465f0 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -1,31 +1,31 @@ -# -*- encoding: utf-8 -*- +# frozen_string_literal: true -require_relative "lib/feedbag" +require_relative 'lib/feedbag' Gem::Specification.new do |s| - s.name = %q{feedbag} + s.name = 'feedbag' s.version = Feedbag::VERSION - s.homepage = "http://github.com/damog/feedbag" - s.licenses = ["MIT"] - s.authors = ["David Moreno"] - s.description = %q{Ruby's favorite feed auto-discovery tool} - s.email = %q{damog@damog.net} + s.homepage = 'http://github.com/damog/feedbag' + s.licenses = ['MIT'] + s.authors = ['David Moreno'] + s.description = "Ruby's favorite feed auto-discovery tool" + s.email = 'damog@damog.net' - s.extra_rdoc_files = ["README.markdown", "COPYING"] - s.files = ["lib/feedbag.rb", "benchmark/rfeedfinder_benchmark.rb", "bin/feedbag"] + s.extra_rdoc_files = ['README.markdown', 'COPYING'] + s.files = ['lib/feedbag.rb', 'benchmark/rfeedfinder_benchmark.rb', 'bin/feedbag'] # s.has_rdoc = true - s.rdoc_options = ["--main", "README.markdown"] - s.summary = %q{RSS/Atom feed auto-discovery tool} + s.rdoc_options = ['--main', 'README.markdown'] + s.summary = 'RSS/Atom feed auto-discovery tool' s.add_runtime_dependency 'nokogiri', '~> 1.8', '>= 1.8.2' - s.add_development_dependency 'shoulda', '~> 3' - s.add_development_dependency 'mocha', '~> 0.12', '>= 0.12.0' - s.add_development_dependency 'webmock', '~> 3' s.add_development_dependency 'byebug', '~> 11' + s.add_development_dependency 'mocha', '~> 0.12', '>= 0.12.0' s.add_development_dependency 'rake', '~> 12' + s.add_development_dependency 'shoulda', '~> 3' s.add_development_dependency 'test-unit', '~> 3' + s.add_development_dependency 'webmock', '~> 3' s.bindir = 'bin' - s.executables = ["feedbag"] + s.executables = ['feedbag'] end diff --git a/lib/feedbag.rb b/lib/feedbag.rb index f32396b..4dff4b7 100755 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -1,24 +1,22 @@ #!/usr/bin/ruby +# frozen_string_literal: true # See COPYING before using this software. -require "rubygems" -require "nokogiri" -require "open-uri" -require "net/http" +require 'rubygems' +require 'nokogiri' +require 'open-uri' +require 'net/http' class Feedbag VERSION = '1.0.0' - CONTENT_TYPES = [ - 'application/x.atom+xml', - 'application/atom+xml', - 'application/xml', - 'text/xml', - 'application/rss+xml', - 'application/rdf+xml', - 'application/json', - 'application/feed+json' - ].freeze + CONTENT_TYPES = %w[application/x.atom+xml + application/atom+xml + application/xml text/xml + application/rss+xml + application/rdf+xml + application/json + application/feed+json].freeze def self.feed?(url) new.feed?(url) @@ -31,7 +29,7 @@ def self.find(url, options = {}) def initialize(options: nil) @feeds = [] @options = options || {} - @options["User-Agent"] ||= "Feedbag/#{VERSION}" + @options['User-Agent'] ||= "Feedbag/#{VERSION}" end def feed?(url) @@ -40,120 +38,104 @@ def feed?(url) url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" url << "?#{url_uri.query}" if url_uri.query - # hack: - url.sub!(/^feed:\/\//, 'http://') + # hack: + url.sub!(%r{^feed://}, 'http://') res = Feedbag.find(url) - if res.size == 1 and res.first == url - return true - else - return false - end + res.size == 1 and res.first == url end - def find(url, options = {}) + def find(url, _options = {}) url_uri = URI.parse(url) url = nil if url_uri.scheme.nil? - url = "http://#{url_uri.to_s}" - elsif url_uri.scheme == "feed" - return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil) + url = "http://#{url_uri}" + elsif url_uri.scheme == 'feed' + return add_feed(url_uri.to_s.sub(%r{^feed://}, 'http://'), nil) else url = url_uri.to_s end - #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" + # url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" # check if feed_valid is avail begin - require "feed_validator" + require 'feed_validator' v = W3C::FeedValidator.new v.validate_url(url) - return self.add_feed(url, nil) if v.valid? + return add_feed(url, nil) if v.valid? rescue LoadError # scoo rescue REXML::ParseException # usually indicates timeout # TODO: actually find out timeout. use Terminator? # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" - rescue => ex - $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" + rescue StandardError => e + warn "#{e.class} error occurred with: `#{url}': #{e.message}" end begin html = URI.open(url, **@options) do |f| content_type = f.content_type.downcase - if content_type == "application/octet-stream" # open failed - content_type = f.meta["content-type"].gsub(/;.*$/, '') - end - if CONTENT_TYPES.include?(content_type) - return self.add_feed(url, nil) + if content_type == 'application/octet-stream' # open failed + content_type = f.meta['content-type'].gsub(/;.*$/, '') end + return add_feed(url, nil) if CONTENT_TYPES.include?(content_type) doc = Nokogiri::HTML(f.read) - if doc.at("base") and doc.at("base")["href"] - @base_uri = doc.at("base")["href"] - else - @base_uri = nil - end + @base_uri = (doc.at('base')['href'] if doc.at('base') && doc.at('base')['href']) # first with links - (doc/"atom:link").each do |l| - next unless l["rel"] && l["href"].present? - if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self" - self.add_feed(l["href"], url, @base_uri) + (doc / 'atom:link').each do |l| + next unless l['rel'] && l['href'].present? + + if l['type'] && CONTENT_TYPES.include?(l['type'].downcase.strip) && l['rel'].casecmp('self').zero? + add_feed(l['href'], url, @base_uri) end end doc.xpath("//link[@rel='alternate' or @rel='service.feed'][@href][@type]").each do |l| - if CONTENT_TYPES.include?(l['type'].downcase.strip) - self.add_feed(l["href"], url, @base_uri) - end + add_feed(l['href'], url, @base_uri) if CONTENT_TYPES.include?(l['type'].downcase.strip) end doc.xpath("//link[@rel='alternate' and @type='application/json'][@href]").each do |e| - self.add_feed(e['href'], url, @base_uri) if self.looks_like_feed?(e['href']) + add_feed(e['href'], url, @base_uri) if looks_like_feed?(e['href']) end - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/) - self.add_feed(a["href"], url, @base_uri) + (doc / 'a').each do |a| + next unless a['href'] + + if looks_like_feed?(a['href']) && (a['href'] =~ (%r{/}) || a['href'] =~ (/#{url_uri.host}/)) + add_feed(a['href'], url, @base_uri) end end - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) - self.add_feed(a["href"], url, @base_uri) - end + (doc / 'a').each do |a| + next unless a['href'] + + add_feed(a['href'], url, @base_uri) if looks_like_feed?(a['href']) end # Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml - if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip - self.add_feed(url, nil) + if url.match(/.xml$/) && doc.root && doc.root['xml:base'] && (doc.root['xml:base'].strip == url.strip) + add_feed(url, nil) end end - rescue Timeout::Error => err - $stderr.puts "Timeout error occurred with `#{url}: #{err}'" - rescue OpenURI::HTTPError => the_error - $stderr.puts "Error occurred with `#{url}': #{the_error}" - rescue SocketError => err - $stderr.puts "Socket error occurred with: `#{url}': #{err}" - rescue => ex - $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" + rescue Timeout::Error => e + warn "Timeout error occurred with `#{url}: #{e}'" + rescue OpenURI::HTTPError => e + warn "Error occurred with `#{url}': #{e}" + rescue SocketError => e + warn "Socket error occurred with: `#{url}': #{e}" + rescue StandardError => e + warn "#{e.class} error occurred with: `#{url}': #{e.message}" ensure return @feeds end - end def looks_like_feed?(url) - if url =~ /(\.(rdf|xml|rss)(\?([\w'\-%]?(=[\w'\-%.]*)?(&|#|\+|\;)?)+)?(:[\w'\-%]+)?$|feed=(rss|atom)|(atom|feed)\/?$)/i - true - else - false - end + %r{(\.(rdf|xml|rss)(\?([\w'\-%]?(=[\w'\-%.]*)?(&|#|\+|;)?)+)?(:[\w'\-%]+)?$|feed=(rss|atom)|(atom|feed)/?$)}i.match?(url) end def add_feed(feed_url, orig_url, base_uri = nil) @@ -167,7 +149,7 @@ def add_feed(feed_url, orig_url, base_uri = nil) begin uri = URI.parse(url) - rescue + rescue StandardError puts "Error with `#{url}'" exit 1 end @@ -177,7 +159,7 @@ def add_feed(feed_url, orig_url, base_uri = nil) end # verify url is really valid - @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url) + @feeds.push(url) unless @feeds.include?(url) # if self._is_http_valid(URI.parse(url), orig_url) end # not used. yet. @@ -185,16 +167,16 @@ def _is_http_valid(uri, orig_url) req = Net::HTTP.get_response(uri) orig_uri = URI.parse(orig_url) case req - when Net::HTTPSuccess then - return true + when Net::HTTPSuccess + true else - return false + false end end end -if __FILE__ == $0 - if ARGV.size == 0 +if __FILE__ == $PROGRAM_NAME + if ARGV.size.zero? puts 'usage: feedbag url' else puts Feedbag.find ARGV.first diff --git a/rails/init.rb b/rails/init.rb index 025da82..50faff5 100644 --- a/rails/init.rb +++ b/rails/init.rb @@ -1 +1,3 @@ -require File.join File.dirname(__FILE__), "..", "lib", "feedbag" +# frozen_string_literal: true + +require File.join File.dirname(__FILE__), '..', 'lib', 'feedbag' diff --git a/test/feedbag_test.rb b/test/feedbag_test.rb index b8d04f1..a0652b6 100644 --- a/test/feedbag_test.rb +++ b/test/feedbag_test.rb @@ -1,113 +1,122 @@ +# frozen_string_literal: true + require 'test_helper' class FeedbagTest < Test::Unit::TestCase - - context "Feedbag.feed? should know that an RSS url is a feed" do - setup do + context 'Feedbag.feed? should know that an RSS url is a feed' do + before do @rss_url = 'http://example.com/rss/' Feedbag.stubs(:find).with(@rss_url).returns([@rss_url]) end - should "return true" do + should 'return true' do assert Feedbag.feed?(@rss_url) end end - context "Feedbag.feed? should know that an RSS url with parameters is a feed" do - setup do - @rss_url = "http://example.com/data?format=rss" + context 'Feedbag.feed? should know that an RSS url with parameters is a feed' do + before do + @rss_url = 'http://example.com/data?format=rss' Feedbag.stubs(:find).with(@rss_url).returns([@rss_url]) end - should "return true" do + should 'return true' do assert Feedbag.feed?(@rss_url) end end - context "Feedbag find should discover feeds containing atom:link" do - setup do + context 'Feedbag find should discover feeds containing atom:link' do + before do @feeds = ['http://jenniferlynch.wordpress.com/feed', 'http://lurenbijdeburen.wordpress.com/feed'] end - should "find atom feed" do + should 'find atom feed' do @feeds.each do |url| assert_equal [url], Feedbag.find(url) end end end - context "Feedbag#looks_like_feed? should assume that url with proper extension is a feed" do - setup do + context 'Feedbag#looks_like_feed? should assume that url with proper extension is a feed' do + before do @feeds = ['http://feeds.bbci.co.uk/news/rss.xml', 'http://feeds.bbci.co.uk/news/rss.rdf', 'http://feeds.bbci.co.uk/news/rss.rss', 'http://feeds.bbci.co.uk/news/rss.xml?edition=int'] end - should "return true" do + should 'return true' do @feeds.each do |url| assert Feedbag.new.looks_like_feed?(url) end end end - context "Feedbag find should discover JSON Feeds" do - should "find json feed" do + context 'Feedbag find should discover JSON Feeds' do + should 'find json feed' do src = 'test/testcases/json1.html' - stub_request(:any, "example3.com").to_return(body: File.new(src), status: 200, headers: {"Content-Type" => 'text/html'}) + stub_request(:any, 'example3.com').to_return(body: File.new(src), status: 200, + headers: { 'Content-Type' => 'text/html' }) result = Feedbag.find('http://example3.com') - + assert result.include?('https://blog.booko.com.au/feed/json/') assert result.include?('https://blog.booko.com.au/feed/') assert result.include?('https://blog.booko.com.au/comments/feed/') end end - context "Feedbag should follow redirects" do - should "follow redirects" do + context 'Feedbag should follow redirects' do + should 'follow redirects' do src = 'test/testcases/json1.html' - stub_request(:any, "example1.com").to_return(status: 301, headers: {"Location" => "//example2.com", "Content-Type" => "text/html"}) - stub_request(:any, "example2.com").to_return(body: File.new(src), status: 200, headers: {"Content-Type" => 'text/html'}) + stub_request(:any, 'example1.com').to_return(status: 301, + headers: { + 'Location' => '//example2.com', 'Content-Type' => 'text/html' + }) + stub_request(:any, 'example2.com').to_return(body: File.new(src), status: 200, + headers: { 'Content-Type' => 'text/html' }) result = Feedbag.find('http://example1.com') assert result.include?('https://blog.booko.com.au/feed/json/') end end - context "Feedbag should send the correct User Agent" do - should "send correct user agent" do + context 'Feedbag should send the correct User Agent' do + should 'send correct user agent' do src = 'test/testcases/json1.html' default_user_agent = "Feedbag/#{Feedbag::VERSION}" - stub_request(:any, "example3.com").with(headers:{ 'User-Agent' => "Feedbag/#{Feedbag::VERSION}" }).to_return(body: File.new(src), status: 200, headers: {"Content-Type" => 'text/html'}) + stub_request(:any, 'example3.com').with(headers: { 'User-Agent' => "Feedbag/#{Feedbag::VERSION}" }).to_return( + body: File.new(src), status: 200, headers: { 'Content-Type' => 'text/html' } + ) # This request does match the stub with the default User-Agent and should return a result result = Feedbag.find('http://example3.com') assert result.include?('https://blog.booko.com.au/feed/json/') - # This request does not match the stub using the custom User-Agent - result = Feedbag.find('http://example3.com', 'User-Agent' => "My Personal Agent/1.0.1") + # This request does not match the stub using the custom User-Agent + result = Feedbag.find('http://example3.com', 'User-Agent' => 'My Personal Agent/1.0.1') assert result.empty? - stub_request(:any, "example4.com").with(headers:{ 'User-Agent' => "My Personal Agent/1.0.1" }).to_return(body: File.new(src), status: 200, headers: {"Content-Type" => 'text/html'}) + stub_request(:any, 'example4.com').with(headers: { 'User-Agent' => 'My Personal Agent/1.0.1' }).to_return( + body: File.new(src), status: 200, headers: { 'Content-Type' => 'text/html' } + ) # This request does not match the stub using the default User-Agent result = Feedbag.find('http://example4.com') assert result.empty? # This request does match the stub with a custom User-Agent and should return a result - result = Feedbag.find('http://example4.com', 'User-Agent' => "My Personal Agent/1.0.1") + result = Feedbag.find('http://example4.com', 'User-Agent' => 'My Personal Agent/1.0.1') assert result.include?('https://blog.booko.com.au/feed/json/') end end - #context "Feedbag should pass other options to open-uri" do + # context "Feedbag should pass other options to open-uri" do # should "pass options to open-uri" do # end - #end + # end - context "Feedbag should be able to find URLs with ampersands and plus signs" do - setup do + context 'Feedbag should be able to find URLs with ampersands and plus signs' do + before do @feed = 'https://link.springer.com/search.rss?facet-content-type=Article&facet-journal-id=41116&channel-name=Living+Reviews+in+Solar+Physics' end - should "return true" do + should 'return true' do assert Feedbag.new.looks_like_feed?(@feed) end end - end diff --git a/test/test_helper.rb b/test/test_helper.rb index d0edfd6..bcc3f2e 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'rubygems' require 'test/unit' require 'shoulda'