diff --git a/lib/string_tools/html.rb b/lib/string_tools/html.rb index 4cabc40..aa32676 100644 --- a/lib/string_tools/html.rb +++ b/lib/string_tools/html.rb @@ -56,6 +56,7 @@ def self.remove_links(html, options = {}) class LinksRemoveScrubber def initialize(options) @whitelist = options.fetch(:whitelist) + @remove_without_host = options.fetch(:remove_without_host, true) @is_have_done_changes = false end @@ -67,11 +68,11 @@ def call(node) href = node['href'] return if href.blank? uri = Addressable::URI.parse(href).normalize - return unless uri.host - replace_with_contetn node unless whitelisted? SimpleIDN.to_unicode(uri.host) - rescue - # в любой непонятной ситуации просто удаляем ссылку - replace_with_content node + if !uri.host + replace_with_content node if @remove_without_host + elsif !whitelisted?(SimpleIDN.to_unicode(uri.host)) + replace_with_content node + end end def whitelisted?(domain) diff --git a/spec/html_spec.rb b/spec/html_spec.rb index aca1e07..8c03121 100644 --- a/spec/html_spec.rb +++ b/spec/html_spec.rb @@ -105,19 +105,45 @@ MARKUP end end + end - context 'content with relative links' do - let(:html) do + context 'content with links without host' do + let(:html) do <<-MARKUP - google - yandex + relative + absolute + MARKUP + end + + context ':remove_without_host not set' do + subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru']) } + + it 'should remove' do + is_expected.to eq(<<-MARKUP) + relative + absolute MARKUP end + end + + context ':remove_without_host set to false' do + subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: false) } - it 'should keep relative links' do + it 'should keep' do is_expected.to eq(<<-MARKUP) - google - yandex + relative + absolute + MARKUP + end + end + + context ':remove_without_host set to true' do + subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: true) } + + it 'should remove' do + is_expected.to eq(<<-MARKUP) + relative + absolute MARKUP end end @@ -133,7 +159,7 @@ MARKUP end - it 'should keep relative links' do + it 'should keep only whitelisted links' do is_expected.to eq(<<-MARKUP) www.фермаежей.рф www.мояфермаежей.рф