Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid allocating intermediary strings when readpartial is passed an outbuf #61

Merged
merged 2 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 66 additions & 44 deletions ext/zlib/zlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static void zstream_expand_buffer_into(struct zstream*, unsigned long);
static int zstream_expand_buffer_non_stream(struct zstream *z);
static void zstream_append_buffer(struct zstream*, const Bytef*, long);
static VALUE zstream_detach_buffer(struct zstream*);
static VALUE zstream_shift_buffer(struct zstream*, long);
static VALUE zstream_shift_buffer(struct zstream*, long, VALUE);
static void zstream_buffer_ungets(struct zstream*, const Bytef*, unsigned long);
static void zstream_buffer_ungetbyte(struct zstream*, int);
static void zstream_append_input(struct zstream*, const Bytef*, long);
Expand Down Expand Up @@ -170,8 +170,8 @@ static void gzfile_check_footer(struct gzfile*, VALUE outbuf);
static void gzfile_write(struct gzfile*, Bytef*, long);
static long gzfile_read_more(struct gzfile*, VALUE outbuf);
static void gzfile_calc_crc(struct gzfile*, VALUE);
static VALUE gzfile_read(struct gzfile*, long);
static VALUE gzfile_read_all(struct gzfile*);
static VALUE gzfile_read(struct gzfile*, long, VALUE);
static VALUE gzfile_read_all(struct gzfile*, VALUE);
static void gzfile_ungets(struct gzfile*, const Bytef*, long);
static void gzfile_ungetbyte(struct gzfile*, int);
static VALUE gzfile_writer_end_run(VALUE);
Expand Down Expand Up @@ -820,19 +820,31 @@ zstream_detach_buffer(struct zstream *z)
}

static VALUE
zstream_shift_buffer(struct zstream *z, long len)
zstream_shift_buffer(struct zstream *z, long len, VALUE dst)
{
VALUE dst;
char *bufptr;
long buflen = ZSTREAM_BUF_FILLED(z);

if (buflen <= len) {
return zstream_detach_buffer(z);
if (NIL_P(dst) || (!ZSTREAM_IS_FINISHED(z) && !ZSTREAM_IS_GZFILE(z) &&
rb_block_given_p())) {
return zstream_detach_buffer(z);
} else {
bufptr = RSTRING_PTR(z->buf);
rb_str_resize(dst, buflen);
memcpy(RSTRING_PTR(dst), bufptr, buflen);
}
buflen = 0;
} else {
bufptr = RSTRING_PTR(z->buf);
if (NIL_P(dst)) {
dst = rb_str_new(bufptr, len);
} else {
rb_str_resize(dst, len);
memcpy(RSTRING_PTR(dst), bufptr, len);
}
buflen -= len;
}

bufptr = RSTRING_PTR(z->buf);
dst = rb_str_new(bufptr, len);
buflen -= len;
memmove(bufptr, bufptr + len, buflen);
rb_str_set_len(z->buf, buflen);
z->stream.next_out = (Bytef*)RSTRING_END(z->buf);
Expand Down Expand Up @@ -2874,33 +2886,46 @@ gzfile_newstr(struct gzfile *gz, VALUE str)
}

static long
gzfile_fill(struct gzfile *gz, long len)
gzfile_fill(struct gzfile *gz, long len, VALUE outbuf)
{
if (len < 0)
rb_raise(rb_eArgError, "negative length %ld given", len);
if (len == 0)
return 0;
while (!ZSTREAM_IS_FINISHED(&gz->z) && ZSTREAM_BUF_FILLED(&gz->z) < len) {
gzfile_read_more(gz, Qnil);
gzfile_read_more(gz, outbuf);
}
if (GZFILE_IS_FINISHED(gz)) {
if (!(gz->z.flags & GZFILE_FLAG_FOOTER_FINISHED)) {
gzfile_check_footer(gz, Qnil);
gzfile_check_footer(gz, outbuf);
}
return -1;
}
return len < ZSTREAM_BUF_FILLED(&gz->z) ? len : ZSTREAM_BUF_FILLED(&gz->z);
}

static VALUE
gzfile_read(struct gzfile *gz, long len)
gzfile_read(struct gzfile *gz, long len, VALUE outbuf)
{
VALUE dst;

len = gzfile_fill(gz, len);
if (len == 0) return rb_str_new(0, 0);
if (len < 0) return Qnil;
dst = zstream_shift_buffer(&gz->z, len);
len = gzfile_fill(gz, len, outbuf);

if (len < 0) {
if (!NIL_P(outbuf))
rb_str_resize(outbuf, 0);
return Qnil;
}
if (len == 0) {
if (NIL_P(outbuf))
return rb_str_new(0, 0);
else {
rb_str_resize(outbuf, 0);
return outbuf;
}
}

dst = zstream_shift_buffer(&gz->z, len, outbuf);
if (!NIL_P(dst)) gzfile_calc_crc(gz, dst);
return dst;
}
Expand Down Expand Up @@ -2933,29 +2958,26 @@ gzfile_readpartial(struct gzfile *gz, long len, VALUE outbuf)
rb_raise(rb_eEOFError, "end of file reached");
}

dst = zstream_shift_buffer(&gz->z, len);
dst = zstream_shift_buffer(&gz->z, len, outbuf);
gzfile_calc_crc(gz, dst);

if (!NIL_P(outbuf)) {
rb_str_resize(outbuf, RSTRING_LEN(dst));
memcpy(RSTRING_PTR(outbuf), RSTRING_PTR(dst), RSTRING_LEN(dst));
dst = outbuf;
}
return dst;
}

static VALUE
gzfile_read_all(struct gzfile *gz)
gzfile_read_all(struct gzfile *gz, VALUE dst)
{
VALUE dst;

while (!ZSTREAM_IS_FINISHED(&gz->z)) {
gzfile_read_more(gz, Qnil);
gzfile_read_more(gz, dst);
}
if (GZFILE_IS_FINISHED(gz)) {
if (!(gz->z.flags & GZFILE_FLAG_FOOTER_FINISHED)) {
gzfile_check_footer(gz, Qnil);
gzfile_check_footer(gz, dst);
}
if (!NIL_P(dst)) {
rb_str_resize(dst, 0);
return dst;
}
return rb_str_new(0, 0);
}

Expand Down Expand Up @@ -2993,15 +3015,15 @@ gzfile_getc(struct gzfile *gz)
de = (unsigned char *)ds + GZFILE_CBUF_CAPA;
(void)rb_econv_convert(gz->ec, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_AFTER_OUTPUT);
rb_econv_check_error(gz->ec);
dst = zstream_shift_buffer(&gz->z, sp - ss);
dst = zstream_shift_buffer(&gz->z, sp - ss, Qnil);
gzfile_calc_crc(gz, dst);
rb_str_resize(cbuf, dp - ds);
return cbuf;
}
else {
buf = gz->z.buf;
len = rb_enc_mbclen(RSTRING_PTR(buf), RSTRING_END(buf), gz->enc);
dst = gzfile_read(gz, len);
dst = gzfile_read(gz, len, Qnil);
if (NIL_P(dst)) return dst;
return gzfile_newstr(gz, dst);
}
Expand Down Expand Up @@ -3909,7 +3931,7 @@ rb_gzreader_s_zcat(int argc, VALUE *argv, VALUE klass)
if (!buf) {
buf = rb_str_new(0, 0);
}
tmpbuf = gzfile_read_all(get_gzfile(obj));
tmpbuf = gzfile_read_all(get_gzfile(obj), Qnil);
rb_str_cat(buf, RSTRING_PTR(tmpbuf), RSTRING_LEN(tmpbuf));
}

Expand Down Expand Up @@ -4011,19 +4033,19 @@ static VALUE
rb_gzreader_read(int argc, VALUE *argv, VALUE obj)
{
struct gzfile *gz = get_gzfile(obj);
VALUE vlen;
VALUE vlen, outbuf;
long len;

rb_scan_args(argc, argv, "01", &vlen);
rb_scan_args(argc, argv, "02", &vlen, &outbuf);
if (NIL_P(vlen)) {
return gzfile_read_all(gz);
return gzfile_read_all(gz, outbuf);
}

len = NUM2INT(vlen);
if (len < 0) {
rb_raise(rb_eArgError, "negative length %ld given", len);
}
return gzfile_read(gz, len);
return gzfile_read(gz, len, outbuf);
}

/*
Expand Down Expand Up @@ -4096,7 +4118,7 @@ rb_gzreader_getbyte(VALUE obj)
struct gzfile *gz = get_gzfile(obj);
VALUE dst;

dst = gzfile_read(gz, 1);
dst = gzfile_read(gz, 1, Qnil);
if (!NIL_P(dst)) {
dst = INT2FIX((unsigned int)(RSTRING_PTR(dst)[0]) & 0xff);
}
Expand Down Expand Up @@ -4217,7 +4239,7 @@ gzreader_skip_linebreaks(struct gzfile *gz)
}
}

str = zstream_shift_buffer(&gz->z, n - 1);
str = zstream_shift_buffer(&gz->z, n - 1, Qnil);
gzfile_calc_crc(gz, str);
}

Expand All @@ -4238,7 +4260,7 @@ gzreader_charboundary(struct gzfile *gz, long n)
if (l < n) {
int n_bytes = rb_enc_precise_mbclen(p, e, gz->enc);
if (MBCLEN_NEEDMORE_P(n_bytes)) {
if ((l = gzfile_fill(gz, n + MBCLEN_NEEDMORE_LEN(n_bytes))) > 0) {
if ((l = gzfile_fill(gz, n + MBCLEN_NEEDMORE_LEN(n_bytes), Qnil)) > 0) {
return l;
}
}
Expand Down Expand Up @@ -4290,10 +4312,10 @@ gzreader_gets(int argc, VALUE *argv, VALUE obj)

if (NIL_P(rs)) {
if (limit < 0) {
dst = gzfile_read_all(gz);
dst = gzfile_read_all(gz, Qnil);
if (RSTRING_LEN(dst) == 0) return Qnil;
}
else if ((n = gzfile_fill(gz, limit)) <= 0) {
else if ((n = gzfile_fill(gz, limit, Qnil)) <= 0) {
return Qnil;
}
else {
Expand All @@ -4303,7 +4325,7 @@ gzreader_gets(int argc, VALUE *argv, VALUE obj)
else {
n = limit;
}
dst = zstream_shift_buffer(&gz->z, n);
dst = zstream_shift_buffer(&gz->z, n, Qnil);
if (NIL_P(dst)) return dst;
gzfile_calc_crc(gz, dst);
dst = gzfile_newstr(gz, dst);
Expand All @@ -4330,7 +4352,7 @@ gzreader_gets(int argc, VALUE *argv, VALUE obj)
while (ZSTREAM_BUF_FILLED(&gz->z) < rslen) {
if (ZSTREAM_IS_FINISHED(&gz->z)) {
if (ZSTREAM_BUF_FILLED(&gz->z) > 0) gz->lineno++;
return gzfile_read(gz, rslen);
return gzfile_read(gz, rslen, Qnil);
}
gzfile_read_more(gz, Qnil);
}
Expand Down Expand Up @@ -4367,7 +4389,7 @@ gzreader_gets(int argc, VALUE *argv, VALUE obj)
}

gz->lineno++;
dst = gzfile_read(gz, n);
dst = gzfile_read(gz, n, Qnil);
if (NIL_P(dst)) return dst;
if (rspara) {
gzreader_skip_linebreaks(gz);
Expand Down
25 changes: 24 additions & 1 deletion test/zlib/test_zlib.rb
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,25 @@ def test_read
assert_raise(ArgumentError) { f.read(-1) }
assert_equal(str, f.read)
end

Zlib::GzipReader.open(t.path) do |f|
s = "".b

assert_raise(ArgumentError) { f.read(-1, s) }

assert_same s, f.read(1, s)
assert_equal "\xE3".b, s

assert_same s, f.read(2, s)
assert_equal "\x81\x82".b, s

assert_same s, f.read(6, s)
assert_equal "\u3044\u3046".b, s

assert_nil f.read(1, s)
assert_equal "".b, s
assert_predicate f, :eof?
end
}
end

Expand All @@ -1002,10 +1021,14 @@ def test_readpartial

Zlib::GzipReader.open(t.path) do |f|
s = "".dup
f.readpartial(3, s)
assert_same s, f.readpartial(3, s)
assert("foo".start_with?(s))

assert_raise(ArgumentError) { f.readpartial(-1) }

assert_same s, f.readpartial(3, s)

assert_predicate f, :eof?
end
}
end
Expand Down