Skip to content

Commit

Permalink
feature(html): удаление ссылок без хоста по-умолчанию
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryBochkarev committed Oct 26, 2015
1 parent 8fe4384 commit 62ce841
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 13 deletions.
11 changes: 6 additions & 5 deletions lib/string_tools/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def self.remove_links(html, options = {})
class LinksRemoveScrubber
def initialize(options)
@whitelist = options.fetch(:whitelist)
@remove_without_host = options.fetch(:remove_without_host, true)
@is_have_done_changes = false
end

Expand All @@ -67,11 +68,11 @@ def call(node)
href = node['href']
return if href.blank?
uri = Addressable::URI.parse(href).normalize
return unless uri.host
replace_with_contetn node unless whitelisted? SimpleIDN.to_unicode(uri.host)
rescue
# в любой непонятной ситуации просто удаляем ссылку
replace_with_content node
if !uri.host
replace_with_content node if @remove_without_host
elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
replace_with_content node
end
end

def whitelisted?(domain)
Expand Down
42 changes: 34 additions & 8 deletions spec/html_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,45 @@
MARKUP
end
end
end

context 'content with relative links' do
let(:html) do
context 'content with links without host' do
let(:html) do
<<-MARKUP
<a href="https://google.com"><span>goo</span><span>gle</span></a>
<a href="yandex.ru"><span>yan</span><span>dex</span></a>
<a href="yandex.ru">relative</a>
<a href="/yandex.ru">absolute</a>
MARKUP
end

context ':remove_without_host not set' do
subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru']) }

it 'should remove' do
is_expected.to eq(<<-MARKUP)
relative
absolute
MARKUP
end
end

context ':remove_without_host set to false' do
subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: false) }

it 'should keep relative links' do
it 'should keep' do
is_expected.to eq(<<-MARKUP)
<span>goo</span><span>gle</span>
<a href="yandex.ru"><span>yan</span><span>dex</span></a>
<a href="yandex.ru">relative</a>
<a href="/yandex.ru">absolute</a>
MARKUP
end
end

context ':remove_without_host set to true' do
subject { StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'], remove_without_host: true) }

it 'should remove' do
is_expected.to eq(<<-MARKUP)
relative
absolute
MARKUP
end
end
Expand All @@ -133,7 +159,7 @@
MARKUP
end

it 'should keep relative links' do
it 'should keep only whitelisted links' do
is_expected.to eq(<<-MARKUP)
<a href="https://www.фермаежей.рф">www.фермаежей.рф</a>
www.мояфермаежей.рф
Expand Down

0 comments on commit 62ce841

Please sign in to comment.