From 68a944fa8400a8fc6e94967a083b4bedb40cce04 Mon Sep 17 00:00:00 2001 From: Sorah Fukumori Date: Fri, 11 Aug 2023 04:48:18 +0900 Subject: [PATCH] Zlib.gunzip should not fail with utf-8 strings zstream_discard_input was encoding and character-aware when given input is user-provided, so this discards `len` chars instead of `len` bytes. Also Zlib.gunzip explains in its rdoc that it is equivalent with the following code, but this doesn't fail for UTF-8 String. ```ruby string = %w[1f8b0800c28000000003cb48cdc9c9070086a6103605000000].pack("H*").force_encoding('UTF-8') sio = StringIO.new(string) gz = Zlib::GzipReader.new(sio, encoding: Encoding::ASCII_8BIT) p gz.read gz&.close ``` --- ext/zlib/zlib.c | 2 +- test/zlib/test_zlib.rb | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c index aefdba0..ef6e892 100644 --- a/ext/zlib/zlib.c +++ b/ext/zlib/zlib.c @@ -923,7 +923,7 @@ zstream_discard_input(struct zstream *z, long len) z->input = Qnil; } else { - z->input = rb_str_substr(z->input, len, + z->input = rb_str_subseq(z->input, len, RSTRING_LEN(z->input) - len); } } diff --git a/test/zlib/test_zlib.rb b/test/zlib/test_zlib.rb index ccb8b38..464141f 100644 --- a/test/zlib/test_zlib.rb +++ b/test/zlib/test_zlib.rb @@ -1457,6 +1457,13 @@ def test_gunzip assert_raise(Zlib::GzipFile::Error){ Zlib.gunzip(src) } end + # Zlib.gunzip input is always considered a binary string, regardless of its String#encoding. + def test_gunzip_encoding + # vvvvvvvv = mtime, but valid UTF-8 string of U+0080 + src = %w[1f8b0800c28000000003cb48cdc9c9070086a6103605000000].pack("H*").force_encoding('UTF-8') + assert_equal 'hello', Zlib.gunzip(src.freeze) + end + def test_gunzip_no_memory_leak assert_no_memory_leak(%[-rzlib], "#{<<~"{#"}", "#{<<~'};'}") d = Zlib.gzip("data")