Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactor](block) Refactor interface of shrink column #44046

Merged
merged 1 commit into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,9 @@ class IColumn : public COW<IColumn> {
return nullptr;
}

// shrink the end zeros for CHAR type or ARRAY<CHAR> type
virtual MutablePtr get_shrinked_column() {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Method get_shrinked_column is not supported for " + get_name());
return nullptr;
}

// check the column whether could shrinked
// now support only in char type, or the nested type in complex type: array{char}, struct{char}, map{char}
virtual bool could_shrinked_column() { return false; }
// shrink the end zeros for ColumnStr(also for who has it nested). so nest column will call it for all nested.
// for non-str col, will reach here(do nothing). only ColumnStr will really shrink itself.
virtual void shrink_padding_chars() {}
zclllyybb marked this conversation as resolved.
Show resolved Hide resolved

/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
Expand Down
12 changes: 2 additions & 10 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest
offsets = ColumnOffsets::create();
}

bool ColumnArray::could_shrinked_column() {
return data->could_shrinked_column();
}

MutableColumnPtr ColumnArray::get_shrinked_column() {
if (could_shrinked_column()) {
return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable());
} else {
return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable());
}
void ColumnArray::shrink_padding_chars() {
data->shrink_padding_chars();
}

std::string ColumnArray::get_name() const {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ class ColumnArray final : public COWHelper<IColumn, ColumnArray> {
return Base::create(std::forward<Args>(args)...);
}

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

/** On the index i there is an offset to the beginning of the i + 1 -th element. */
using ColumnOffsets = ColumnVector<Offset64>;
Expand Down
24 changes: 3 additions & 21 deletions be/src/vec/columns/column_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,27 +502,9 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) const {
return res;
}

bool ColumnMap::could_shrinked_column() {
return keys_column->could_shrinked_column() || values_column->could_shrinked_column();
}

MutableColumnPtr ColumnMap::get_shrinked_column() {
MutableColumns new_columns(2);

if (keys_column->could_shrinked_column()) {
new_columns[0] = keys_column->get_shrinked_column();
} else {
new_columns[0] = keys_column->get_ptr();
}

if (values_column->could_shrinked_column()) {
new_columns[1] = values_column->get_shrinked_column();
} else {
new_columns[1] = values_column->get_ptr();
}

return ColumnMap::create(new_columns[0]->assume_mutable(), new_columns[1]->assume_mutable(),
offsets_column->assume_mutable());
void ColumnMap::shrink_padding_chars() {
keys_column->shrink_padding_chars();
values_column->shrink_padding_chars();
}

void ColumnMap::reserve(size_t n) {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ class ColumnMap final : public COWHelper<IColumn, ColumnMap> {
const char* deserialize_and_insert_from_arena(const char* pos) override;

void update_hash_with_value(size_t n, SipHash& hash) const override;
MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
Expand Down
13 changes: 2 additions & 11 deletions be/src/vec/columns/column_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP
_need_update_has_null = true;
}

bool ColumnNullable::could_shrinked_column() {
return get_nested_column_ptr()->could_shrinked_column();
}

MutableColumnPtr ColumnNullable::get_shrinked_column() {
if (could_shrinked_column()) {
return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(),
get_null_map_column_ptr());
} else {
return ColumnNullable::create(get_nested_column_ptr(), get_null_map_column_ptr());
}
void ColumnNullable::shrink_padding_chars() {
get_nested_column_ptr()->shrink_padding_chars();
}

void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public N
return Base::create(std::forward<Args>(args)...);
}

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

bool is_variable_length() const override { return nested_column->is_variable_length(); }

std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; }
Expand Down
6 changes: 0 additions & 6 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,12 +446,6 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_data) const override;

// Not implemented
MutableColumnPtr get_shrinked_column() override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"get_shrinked_column" + get_name());
}

Int64 get_int(size_t /*n*/) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int" + get_name());
}
Expand Down
31 changes: 21 additions & 10 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <cstring>

#include "util/memcpy_inlined.h"
#include "util/simd/bits.h"
Expand Down Expand Up @@ -81,16 +82,26 @@ MutableColumnPtr ColumnStr<T>::clone_resized(size_t to_size) const {
}

template <typename T>
MutableColumnPtr ColumnStr<T>::get_shrinked_column() {
auto shrinked_column = ColumnStr<T>::create();
shrinked_column->get_offsets().reserve(offsets.size());
shrinked_column->get_chars().reserve(chars.size());
for (int i = 0; i < size(); i++) {
StringRef str = get_data_at(i);
reinterpret_cast<ColumnStr<T>*>(shrinked_column.get())
->insert_data(str.data, strnlen(str.data, str.size));
}
return shrinked_column;
void ColumnStr<T>::shrink_padding_chars() {
if (size() == 0) {
return;
}
char* data = reinterpret_cast<char*>(chars.data());
auto* offset = offsets.data();
size_t size = offsets.size();

// deal the 0-th element. no need to move.
auto next_start = offset[0];
offset[0] = strnlen(data, size_at(0));
for (size_t i = 1; i < size; i++) {
// get the i-th length and whole move it to cover the last's trailing void
auto length = strnlen(data + next_start, offset[i] - next_start);
memmove(data + offset[i - 1], data + next_start, length);
// offset i will be changed. so save the old value for (i+1)-th to get its length.
next_start = offset[i];
offset[i] = offset[i - 1] + length;
}
chars.resize_fill(offsets.back()); // just call it to shrink memory here. no possible to expand.
}

// This method is only called by MutableBlock::merge_ignore_overflow
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
Chars chars;

// Start position of i-th element.
size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }

/// Size of i-th element, including terminating zero.
Expand Down Expand Up @@ -117,8 +118,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {

MutableColumnPtr clone_resized(size_t to_size) const override;

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override { return true; }
void shrink_padding_chars() override;

Field operator[](size_t n) const override {
assert(n < size());
Expand Down
24 changes: 3 additions & 21 deletions be/src/vec/columns/column_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,28 +313,10 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const {
return ColumnStruct::create(new_columns);
}

bool ColumnStruct::could_shrinked_column() {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
if (columns[i]->could_shrinked_column()) {
return true;
}
}
return false;
}

MutableColumnPtr ColumnStruct::get_shrinked_column() {
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);

for (size_t i = 0; i < tuple_size; ++i) {
if (columns[i]->could_shrinked_column()) {
new_columns[i] = columns[i]->get_shrinked_column();
} else {
new_columns[i] = columns[i]->get_ptr();
}
void ColumnStruct::shrink_padding_chars() {
for (auto& column : columns) {
column->shrink_padding_chars();
}
return ColumnStruct::create(std::move(new_columns));
}

void ColumnStruct::reserve(size_t n) {
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/columns/column_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ class ColumnStruct final : public COWHelper<IColumn, ColumnStruct> {

int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override;

MutableColumnPtr get_shrinked_column() override;
bool could_shrinked_column() override;
void shrink_padding_chars() override;

void reserve(size_t n) override;
void resize(size_t n) override;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector<size_t>& char_
for (auto idx : char_type_idx) {
if (idx < data.size()) {
auto& col_and_name = this->get_by_position(idx);
col_and_name.column = col_and_name.column->assume_mutable()->get_shrinked_column();
col_and_name.column->assume_mutable()->shrink_padding_chars();
}
}
}
Expand Down
48 changes: 48 additions & 0 deletions be/test/vec/columns/column_string_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "vec/columns/column_string.h"

#include <gmock/gmock-more-matchers.h>
#include <gtest/gtest.h>

#include "vec/common/string_ref.h"
#include "vec/core/types.h"

using namespace doris;
using namespace doris::vectorized;

TEST(ColumnStringTest, shrink_padding_chars) {
ColumnString::MutablePtr col = ColumnString::create();
col->insert_data("123\0 ", 7);
col->insert_data("456\0xx", 6);
col->insert_data("78", 2);
col->shrink_padding_chars();

EXPECT_EQ(col->size(), 3);
EXPECT_EQ(col->get_data_at(0), StringRef("123"));
EXPECT_EQ(col->get_data_at(0).size, 3);
EXPECT_EQ(col->get_data_at(1), StringRef("456"));
EXPECT_EQ(col->get_data_at(1).size, 3);
EXPECT_EQ(col->get_data_at(2), StringRef("78"));
EXPECT_EQ(col->get_data_at(2).size, 2);

col->insert_data("xyz", 2); // only xy

EXPECT_EQ(col->size(), 4);
EXPECT_EQ(col->get_data_at(3), StringRef("xy"));
}
Loading