From 784849167d19f7906be90af3fd0ab79f6f93d9b0 Mon Sep 17 00:00:00 2001 From: Terentev Aleksey Date: Fri, 11 Aug 2023 14:21:31 +0500 Subject: [PATCH 1/2] feat: add options for sanitize outer links in css https://jira.railsc.ru/browse/PC4-29908 --- lib/string_tools.rb | 12 ++++++++++-- spec/string_tools_spec.rb | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/string_tools.rb b/lib/string_tools.rb index b124481..30291da 100644 --- a/lib/string_tools.rb +++ b/lib/string_tools.rb @@ -155,6 +155,11 @@ class Base TAGS_WITHOUT_ATTRIBUTES = %w(b strong i em sup sub ul ol li blockquote br tr u caption thead s).freeze + # Public: Sanitize string + # str - String for sanitize + # attrs - Hash, custom attributes, defaults empty hash + # remove_contents - Set of string, tags to be removed + # protocols - Array of string, protocols using in css properties urls def sanitize(str, attrs = {}) # для корректного обрезания utf строчек режем через mb_chars # для защиты от перегрузки парсера пропускаем максимум 1 мегабайт текста @@ -162,6 +167,9 @@ def sanitize(str, attrs = {}) # длина по символам с перестраховкой, т.к. латинские символы(теги, например) занимают 1 байт str = str.mb_chars.slice(0..(2**19)).to_s + remove_contents = attrs.delete(:remove_contents) + protocols = attrs.delete(:protocols) || [] + # Мерджим добавочные теги и атрибуты attributes = TAGS_WITH_ATTRIBUTES.merge(attrs) elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES @@ -173,8 +181,8 @@ def sanitize(str, attrs = {}) str, :attributes => attributes, :elements => elements, - :css => {:properties => Sanitize::Config::RELAXED[:css][:properties]}, - :remove_contents => %w(style script), + :css => {:properties => Sanitize::Config::RELAXED[:css][:properties], protocols: protocols}, + :remove_contents => remove_contents || Set['style', 'script'], :allow_comments => false, :transformers => transformers ) diff --git a/spec/string_tools_spec.rb b/spec/string_tools_spec.rb index b99a907..1d6b7f2 100644 --- a/spec/string_tools_spec.rb +++ b/spec/string_tools_spec.rb @@ -56,6 +56,30 @@ to eq('') end + it 'removes outer link from css when protocols given' do + origin_str = '
' + sanitized_string = described_class.sanitize(origin_str) + expect(sanitized_string).to eq('
') + end + + it 'do not removes outer link from css when protocols given' do + origin_str = '
' + sanitized_string = described_class.sanitize(origin_str, protocols: %w[http https]) + expect(sanitized_string).to eq('
') + end + + it 'removes style content' do + origin_str = '' + sanitized_string = described_class.sanitize(origin_str) + expect(sanitized_string).to eq('') + end + + it 'do not removes style content' do + origin_str = '' + sanitized_string = described_class.sanitize(origin_str, 'style' => %w(type), remove_contents: Set['script']) + expect(sanitized_string).to eq('') + end + context 'multiple invocations of the method' do it 'does not mess up default config' do origin_str = '

' From bed389b5ec5c09c67475e093e23325ecefbfb3fd Mon Sep 17 00:00:00 2001 From: Terentev Aleksey Date: Tue, 29 Aug 2023 17:13:06 +0500 Subject: [PATCH 2/2] feat: sanitize links in alt of img tag https://jira.railsc.ru/browse/PC4-29908 --- lib/string_tools.rb | 9 +++++++++ spec/string_tools_spec.rb | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/lib/string_tools.rb b/lib/string_tools.rb index 30291da..175d981 100644 --- a/lib/string_tools.rb +++ b/lib/string_tools.rb @@ -199,6 +199,7 @@ def call(env) normalize_link node, 'href' when 'img' normalize_link node, 'src' + remove_links node, 'alt' end end @@ -210,6 +211,14 @@ def normalize_link(node, attr_name) rescue Addressable::URI::InvalidURIError node.swap node.children end + + def remove_links(node, attr_name) + return unless node[attr_name] + + node[attr_name] = node[attr_name].gsub(URI::DEFAULT_PARSER.make_regexp, '').squish + + node.remove_attribute(attr_name) if node[attr_name].empty? + end end class IframeNormalizer diff --git a/spec/string_tools_spec.rb b/spec/string_tools_spec.rb index 1d6b7f2..ff239fc 100644 --- a/spec/string_tools_spec.rb +++ b/spec/string_tools_spec.rb @@ -80,6 +80,18 @@ expect(sanitized_string).to eq('') end + it 'removes links in alt attribute of img tag' do + origin_str = 'http://test.test test https://test.test alt' + sanitized_string = described_class.sanitize(origin_str, 'img' => %w(scr alt)) + expect(sanitized_string).to eq('test alt') + end + + it 'removes alt attribute of img tag if empty value' do + origin_str = 'http://test.test' + sanitized_string = described_class.sanitize(origin_str, 'img' => %w(scr alt)) + expect(sanitized_string).to eq('') + end + context 'multiple invocations of the method' do it 'does not mess up default config' do origin_str = '

'