diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index cb60a4c515175bd..3caa194551bf79f 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -90,12 +90,18 @@ void ColumnStr::shrink_padding_chars() { auto* offset = offsets.data(); size_t size = offsets.size(); + // deal the 0-th element. no need to move. + auto next_start = offset[0]; offset[0] = strnlen(data, size_at(0)); for (size_t i = 1; i < size; i++) { - auto length = strnlen(get_data_at(i).data, size_at(i)); - memmove(data + offset[i-1], get_data_at(i).data, length); - offset[i] = length; - } + // get the i-th length and whole move it to cover the last's trailing void + auto length = strnlen(data + next_start, offset[i] - next_start); + memmove(data + offset[i - 1], data + next_start, length); + // offset i will be changed. so save the old value for (i+1)-th to get its length. + next_start = offset[i]; + offset[i] = offset[i - 1] + length; + } + chars.resize_fill(offsets.back()); // just call it to shrink memory here. no possible to expand. } // This method is only called by MutableBlock::merge_ignore_overflow diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index c9dba1de96b267e..bafd623abf607d3 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -85,6 +85,7 @@ class ColumnStr final : public COWHelper> { /// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle. Chars chars; + // Start position of i-th element. size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; } /// Size of i-th element, including terminating zero. diff --git a/be/test/vec/columns/column_string_test.cpp b/be/test/vec/columns/column_string_test.cpp new file mode 100644 index 000000000000000..07195afe1cfcac7 --- /dev/null +++ b/be/test/vec/columns/column_string_test.cpp @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_string.h" + +#include +#include + +#include "vec/common/string_ref.h" +#include "vec/core/types.h" + +using namespace doris; +using namespace doris::vectorized; + +TEST(ColumnStringTest, shrink_padding_chars) { + ColumnString::MutablePtr col = ColumnString::create(); + col->insert_data("123\0 ", 7); + col->insert_data("456\0xx", 6); + col->insert_data("78", 2); + col->shrink_padding_chars(); + + EXPECT_EQ(col->size(), 3); + EXPECT_EQ(col->get_data_at(0), StringRef("123")); + EXPECT_EQ(col->get_data_at(0).size, 3); + EXPECT_EQ(col->get_data_at(1), StringRef("456")); + EXPECT_EQ(col->get_data_at(1).size, 3); + EXPECT_EQ(col->get_data_at(2), StringRef("78")); + EXPECT_EQ(col->get_data_at(2).size, 2); + + col->insert_data("xyz", 2); // only xy + + EXPECT_EQ(col->size(), 4); + EXPECT_EQ(col->get_data_at(3), StringRef("xy")); +} \ No newline at end of file