diff --git a/lib/string_tools/core_ext/string.rb b/lib/string_tools/core_ext/string.rb index f014a25..f772899 100644 --- a/lib/string_tools/core_ext/string.rb +++ b/lib/string_tools/core_ext/string.rb @@ -163,32 +163,21 @@ def detect_encoding e end - def to_utf8! - self.replace(self.to_utf8) - end - - def to_utf8 - # и так utf - return self if is_utf8? + def to_utf8(inplace = false) + return self if valid_encoding? && is_utf8? - enc = detect_encoding + source_enc = detect_encoding + return '' unless source_enc - # если utf или английские буквы, то тоже ок - return self if ['utf-8', 'ascii'].include?(enc) - - # если неизвестная каша, то возвращаем пустую строку - return '' if enc.nil? + encode_utf8(source_enc, inplace) + end - # иначе пытаемся перекодировать - encode 'utf-8', enc, :undef => :replace, :invalid => :replace - rescue - '' + def to_utf8! + to_utf8(true) end def to_cp1251 - encode 'cp1251', :undef => :replace, :invalid => :replace - rescue - '' + encode 'cp1251', undef: :replace, invalid: :replace, replace: '' end def to_cp1251! @@ -230,6 +219,12 @@ def remove_nonprintable! private + def encode_utf8(source_enc, inplace = false) + args = ['utf-8', source_enc, undef: :replace, invalid: :replace, replace: ''] + + inplace ? encode!(*args) : encode(*args) + end + def surround_with_ansi(ascii_seq) "#{ascii_seq}#{protect_escape_of(ascii_seq)}#{ANSI_CLEAR}" end diff --git a/spec/string_spec.rb b/spec/string_spec.rb index cd51aba..6ee0964 100644 --- a/spec/string_spec.rb +++ b/spec/string_spec.rb @@ -2,6 +2,30 @@ require 'spec_helper' describe String do + describe '#to_utf8' do + let(:invalid_str) { "кирилиц\xD0".dup } + + it do + expect(invalid_str).not_to be_valid_encoding + expect(invalid_str.to_utf8).to be_valid_encoding + expect(invalid_str.to_utf8.is_utf8?).to eq(true) + expect(invalid_str.to_utf8).to eq('кирилиц') + end + end + + describe '#to_utf8!' do + let(:invalid_str) { "кирилиц\xD0".dup } + + it do + expect(invalid_str).not_to be_valid_encoding + invalid_str.to_utf8! + + expect(invalid_str).to be_valid_encoding + expect(invalid_str.is_utf8?).to eq(true) + expect(invalid_str).to eq('кирилиц') + end + end + describe '#mb_downcase' do it { expect("Кириллица".mb_downcase).to eq("кириллица") } end