Skip to content

Commit

Permalink
fix(sanitizer): нормализация ссылок в юникоде
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryBochkarev committed Oct 23, 2015
1 parent 4017e3d commit 8fe4384
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
32 changes: 28 additions & 4 deletions lib/string_tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,10 @@ def clear_control_characters(string)

module Sanitizer
class Base

TAGS_WITH_ATTRIBUTES = {
'p' => %w(align style),
'div' => %w(align style),
'span' => %w(align style),
'span' => %w(align style),
'td' => %w(align width valign colspan rowspan style),
'th' => %w(align width valign colspan rowspan style),
'a' => %w(href target name style),
Expand All @@ -137,15 +136,40 @@ def sanitize(str, attr = {})
attributes.merge!(attr)
elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES

Sanitize.fragment(str,
Sanitize.fragment(
str,
:attributes => attributes,
:elements => elements,
:css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
:remove_contents => %w(style javascript),
:allow_comments => false
:allow_comments => false,
:transformers => [LINK_NORMALIZER]
)
end
end

# приводит ссылки согласно стандарту, не корёжит
# http://www.фермаежей.рф => http://www.xn--80ajbaetq5a8a.xn--p1ai
class LinkNormalizer
def call(env)
node = env[:node]
case node.name
when 'a'.freeze
normalize_link node, 'href'.freeze
when 'img'.freeze
normalize_link node, 'src'.freeze
end
end

private

def normalize_link(node, attr_name)
return unless node[attr_name]
node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
end
end

LINK_NORMALIZER = LinkNormalizer.new
end

module SumInWords
Expand Down
14 changes: 14 additions & 0 deletions spec/string_tools_spec.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# coding: utf-8

require 'spec_helper'

describe StringTools do
Expand All @@ -17,5 +19,17 @@
sanitized_string = described_class.sanitize(origin_str)
expect(sanitized_string).to eq origin_str
end

it 'normalize unicode urls in img src attribute' do
origin_str = '<img src="http://www.фермаежей.рф/images/foo.png">'
sanitized_string = described_class.sanitize(origin_str)
expect(sanitized_string).to eq '<img src="http://www.xn--80ajbaetq5a8a.xn--p1ai/images/foo.png">'
end

it 'normalize unicode urls in a href attribute' do
origin_str = '<a href="http://www.фермаежей.рф/">www.фермаежей.рф</a>'
sanitized_string = described_class.sanitize(origin_str)
expect(sanitized_string).to eq '<a href="http://www.xn--80ajbaetq5a8a.xn--p1ai/">www.фермаежей.рф</a>'
end
end
end

0 comments on commit 8fe4384

Please sign in to comment.