diff --git a/lib/string_tools/html.rb b/lib/string_tools/html.rb
index 4cabc40..aa32676 100644
--- a/lib/string_tools/html.rb
+++ b/lib/string_tools/html.rb
@@ -56,6 +56,7 @@ def self.remove_links(html, options = {})
class LinksRemoveScrubber
def initialize(options)
@whitelist = options.fetch(:whitelist)
+ @remove_without_host = options.fetch(:remove_without_host, true)
@is_have_done_changes = false
end
@@ -67,11 +68,11 @@ def call(node)
href = node['href']
return if href.blank?
uri = Addressable::URI.parse(href).normalize
- return unless uri.host
- replace_with_contetn node unless whitelisted? SimpleIDN.to_unicode(uri.host)
- rescue
- # в любой непонятной ситуации просто удаляем ссылку
- replace_with_content node
+ if !uri.host
+ replace_with_content node if @remove_without_host
+ elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
+ replace_with_content node
+ end
end
def whitelisted?(domain)
diff --git a/spec/html_spec.rb b/spec/html_spec.rb
index aca1e07..8c03121 100644
--- a/spec/html_spec.rb
+++ b/spec/html_spec.rb
@@ -105,19 +105,45 @@
MARKUP
end
end
+ end
- context 'content with relative links' do
- let(:html) do
+ context 'content with links without host' do
+ let(:html) do
<<-MARKUP
- google
- yandex
+ relative
+ absolute
+ MARKUP
+ end
+
+ context ':remove_without_host not set' do
+ subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru']) }
+
+ it 'should remove' do
+ is_expected.to eq(<<-MARKUP)
+ relative
+ absolute
MARKUP
end
+ end
+
+ context ':remove_without_host set to false' do
+ subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: false) }
- it 'should keep relative links' do
+ it 'should keep' do
is_expected.to eq(<<-MARKUP)
- google
- yandex
+ relative
+ absolute
+ MARKUP
+ end
+ end
+
+ context ':remove_without_host set to true' do
+ subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: true) }
+
+ it 'should remove' do
+ is_expected.to eq(<<-MARKUP)
+ relative
+ absolute
MARKUP
end
end
@@ -133,7 +159,7 @@
MARKUP
end
- it 'should keep relative links' do
+ it 'should keep only whitelisted links' do
is_expected.to eq(<<-MARKUP)
www.фермаежей.рф
www.мояфермаежей.рф