Skip to content

Commit

Permalink
feat: add options for sanitize outer links in css
Browse files Browse the repository at this point in the history
  • Loading branch information
taleksei committed Aug 30, 2023
1 parent 79a34b2 commit 7848491
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
12 changes: 10 additions & 2 deletions lib/string_tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,21 @@ class Base

TAGS_WITHOUT_ATTRIBUTES = %w(b strong i em sup sub ul ol li blockquote br tr u caption thead s).freeze

# Public: Sanitize string
# str - String for sanitize
# attrs - Hash, custom attributes, defaults empty hash
# remove_contents - Set of string, tags to be removed
# protocols - Array of string, protocols using in css properties urls
def sanitize(str, attrs = {})
# для корректного обрезания utf строчек режем через mb_chars
# для защиты от перегрузки парсера пропускаем максимум 1 мегабайт текста
# длина русского символа в utf-8 - 2 байта, 1Мб/2б = 524288 = 2**19 символов
# длина по символам с перестраховкой, т.к. латинские символы(теги, например) занимают 1 байт
str = str.mb_chars.slice(0..(2**19)).to_s

remove_contents = attrs.delete(:remove_contents)
protocols = attrs.delete(:protocols) || []

# Мерджим добавочные теги и атрибуты
attributes = TAGS_WITH_ATTRIBUTES.merge(attrs)
elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
Expand All @@ -173,8 +181,8 @@ def sanitize(str, attrs = {})
str,
:attributes => attributes,
:elements => elements,
:css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
:remove_contents => %w(style script),
:css => {:properties => Sanitize::Config::RELAXED[:css][:properties], protocols: protocols},
:remove_contents => remove_contents || Set['style', 'script'],
:allow_comments => false,
:transformers => transformers
)
Expand Down
24 changes: 24 additions & 0 deletions spec/string_tools_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,30 @@
to eq('<iframe width="123" height="456" src="https://www.youtube.com/embed/abc" frameborder="0"></iframe>')
end

it 'removes outer link from css when protocols given' do
origin_str = '<div style="background-image: url(http://i54.tinypic.com/4zuxif.jpg)"></div>'
sanitized_string = described_class.sanitize(origin_str)
expect(sanitized_string).to eq('<div></div>')
end

it 'do not removes outer link from css when protocols given' do
origin_str = '<div style="background-image: url(http://i54.tinypic.com/4zuxif.jpg)"></div>'
sanitized_string = described_class.sanitize(origin_str, protocols: %w[http https])
expect(sanitized_string).to eq('<div style="background-image: url(http://i54.tinypic.com/4zuxif.jpg)"></div>')
end

it 'removes style content' do
origin_str = '<style type="text/css">body{color: red;}</style>'
sanitized_string = described_class.sanitize(origin_str)
expect(sanitized_string).to eq('')
end

it 'do not removes style content' do
origin_str = '<style type="text/css">body{color: red;}</style>'
sanitized_string = described_class.sanitize(origin_str, 'style' => %w(type), remove_contents: Set['script'])
expect(sanitized_string).to eq('<style type="text/css">body{color: red;}</style>')
end

context 'multiple invocations of the method' do
it 'does not mess up default config' do
origin_str = '<p style="text-align: center;" title="foobar"></p>'
Expand Down

0 comments on commit 7848491

Please sign in to comment.