diff --git a/lib/string_tools.rb b/lib/string_tools.rb index 9301404..f8f903e 100644 --- a/lib/string_tools.rb +++ b/lib/string_tools.rb @@ -105,6 +105,40 @@ def sanitize(text, options = {}) def clear_control_characters(string) string.tr("\u0000-\u001f", '') end + + # Public: вычищает все html тэги и пробельные символы + # + # string - String строка для очистки + # + # Examples + # + # strip_all_tags_and_entities("ссылка с пробелом

параграф с\tтабуляцией

") + # # => "ссылкаспробелом параграфстабуляцией " + # + # Returns String + def strip_all_tags_and_entities(string) + Sanitize.fragment(string.gsub(/&#([0-9]|10|11|12|13);| |\xc2\xa0|\s/, '')) + end + + # Public: вычищает html тэги кроме переносов + # + # string - String строка для очистки + # + # Examples + # + # strip_tags_leave_br("

параграф

просто перенос
") + # # => "
элемент списка

параграф
просто перенос
" + # + # Returns String + def strip_tags_leave_br(string) + sanitized = Sanitize.fragment(string, remove_contents: %w(style javascript), elements: %w(p ul li br blockquote)) + + sanitized.gsub!(/<(p|li|blockquote)[^>]*>/, '') + sanitized.gsub!(%r{<(br /|ul[^>]*|/[^>]*)>}, '
') + sanitized.gsub!(/
(\s|\302\240)+/, '
') + + sanitized + end end extend Sanitizing diff --git a/spec/string_tools_spec.rb b/spec/string_tools_spec.rb index 10b7a4d..1b737d5 100644 --- a/spec/string_tools_spec.rb +++ b/spec/string_tools_spec.rb @@ -32,4 +32,36 @@ expect(sanitized_string).to eq 'www.фермаежей.рф' end end + + describe '#strip_all_tags_and_entities' do + subject(:strip_all_tags_and_entities) { described_class.strip_all_tags_and_entities(string) } + + context 'string with html tags' do + let(:string) { 'foo
bar
' } + + it { expect(strip_all_tags_and_entities).to eq('foo bar ') } + end + + context 'string with whitespaces and tabs' do + let(:string) { "foo bar\t foo" } + + it { expect(strip_all_tags_and_entities).to eq('foobarfoo') } + end + end + + describe '#strip_tags_leave_br' do + subject(:strip_tags_leave_br) { described_class.strip_tags_leave_br(string) } + + context 'string with html list' do + let(:string) { '' } + + it { expect(strip_tags_leave_br).to eq('
foo

') } + end + + context 'string with html paragraph' do + let(:string) { '

bar

' } + + it { expect(strip_tags_leave_br).to eq('bar
') } + end + end end