Skip to content

Commit

Permalink
feature: перенос методов очистки от html-разметки
Browse files Browse the repository at this point in the history
  • Loading branch information
Le6ow5k1 committed Nov 12, 2015
1 parent a3ca238 commit f665c33
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
34 changes: 34 additions & 0 deletions lib/string_tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,40 @@ def sanitize(text, options = {})
def clear_control_characters(string)
string.tr("\u0000-\u001f", '')
end

# Public: вычищает все html тэги и пробельные символы
#
# string - String строка для очистки
#
# Examples
#
# strip_all_tags_and_entities("<a>ссылка с&nbsp;пробелом</a><p>параграф&#9;с\tтабуляцией</p>")
# # => "ссылкаспробелом параграфстабуляцией "
#
# Returns String
def strip_all_tags_and_entities(string)
Sanitize.fragment(string.gsub(/&#([0-9]|10|11|12|13);|&nbsp;|\xc2\xa0|\s/, ''))
end

# Public: вычищает html тэги кроме переносов
#
# string - String строка для очистки
#
# Examples
#
# strip_tags_leave_br("<a></a><ul><li>элемент списка</li></ul><p>параграф</p>просто перенос<br>")
# # => "<br />элемент списка<br /><br />параграф<br />просто перенос<br>"
#
# Returns String
def strip_tags_leave_br(string)
sanitized = Sanitize.fragment(string, remove_contents: %w(style javascript), elements: %w(p ul li br blockquote))

sanitized.gsub!(/<(p|li|blockquote)[^>]*>/, '')
sanitized.gsub!(%r{<(br /|ul[^>]*|/[^>]*)>}, '<br />')
sanitized.gsub!(/<br \/>(\s|\302\240)+/, '<br />')

sanitized
end
end
extend Sanitizing

Expand Down
32 changes: 32 additions & 0 deletions spec/string_tools_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,36 @@
expect(sanitized_string).to eq '<a href="http://www.xn--80ajbaetq5a8a.xn--p1ai/">www.фермаежей.рф</a>'
end
end

describe '#strip_all_tags_and_entities' do
subject(:strip_all_tags_and_entities) { described_class.strip_all_tags_and_entities(string) }

context 'string with html tags' do
let(:string) { '<a>foo</a><div>bar</div>' }

it { expect(strip_all_tags_and_entities).to eq('foo bar ') }
end

context 'string with whitespaces and tabs' do
let(:string) { "foo&#9;bar\t foo" }

it { expect(strip_all_tags_and_entities).to eq('foobarfoo') }
end
end

describe '#strip_tags_leave_br' do
subject(:strip_tags_leave_br) { described_class.strip_tags_leave_br(string) }

context 'string with html list' do
let(:string) { '<ul><li>foo</li></ul>' }

it { expect(strip_tags_leave_br).to eq('<br />foo<br /><br />') }
end

context 'string with html paragraph' do
let(:string) { '<p>bar</p>' }

it { expect(strip_tags_leave_br).to eq('bar<br />') }
end
end
end

0 comments on commit f665c33

Please sign in to comment.