Skip to content

Commit

Permalink
[Improvement](column) optimization for insert_from_multi_column (#44483)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
1. reduce virtual function overhead
2. use reserve at start

before: 
-  MergeGetNext:  504.852ms
after: 
-  MergeGetNext:  417.681ms
  • Loading branch information
BiteTheDDDDt authored Nov 25, 2024
1 parent 05b48d6 commit 659d6eb
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 11 deletions.
7 changes: 0 additions & 7 deletions be/src/vec/columns/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ void IColumn::insert_from(const IColumn& src, size_t n) {
insert(src[n]);
}

void IColumn::insert_from_multi_column(const std::vector<const IColumn*>& srcs,
std::vector<size_t> positions) {
for (size_t i = 0; i < srcs.size(); ++i) {
insert_from(*srcs[i], positions[i]);
}
}

void IColumn::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms, EqualRange& range, bool last_column) const {
sorter->sort_column(static_cast<const IColumn&>(*this), flags, perms, range, last_column);
Expand Down
7 changes: 5 additions & 2 deletions be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ class IColumn : public COW<IColumn> {
// insert the data of target columns into self column according to positions
// positions[i] means index of srcs whitch need to insert_from
// the virtual function overhead of multiple calls to insert_from can be reduced to once
void insert_from_multi_column(const std::vector<const IColumn*>& srcs,
std::vector<size_t> positions);
virtual void insert_from_multi_column(const std::vector<const IColumn*>& srcs,
const std::vector<size_t>& positions) = 0;

/// Appends a batch elements from other column with the same type
/// indices_begin + indices_end represent the row indices of column src
Expand Down Expand Up @@ -706,6 +706,9 @@ class IColumn : public COW<IColumn> {
template <typename Derived>
void append_data_by_selector_impl(MutablePtr& res, const Selector& selector, size_t begin,
size_t end) const;
template <typename Derived>
void insert_from_multi_column_impl(const std::vector<const IColumn*>& srcs,
const std::vector<size_t>& positions);
};

using ColumnPtr = IColumn::Ptr;
Expand Down
9 changes: 9 additions & 0 deletions be/src/vec/columns/column_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,13 @@ void IColumn::append_data_by_selector_impl(MutablePtr& res, const Selector& sele
append_data_by_selector_impl<Derived>(res, selector, 0, selector.size());
}

template <typename Derived>
void IColumn::insert_from_multi_column_impl(const std::vector<const IColumn*>& srcs,
const std::vector<size_t>& positions) {
reserve(size() + srcs.size());
for (size_t i = 0; i < srcs.size(); ++i) {
static_cast<Derived&>(*this).insert_from(*srcs[i], positions[i]);
}
}

} // namespace doris::vectorized
13 changes: 11 additions & 2 deletions be/src/vec/common/cow.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include <atomic>
#include <initializer_list>
#include <type_traits>
#include <vector>

namespace doris {

/** Copy-on-write shared ptr.
* Allows to work with shared immutable objects and sometimes unshare and mutate you own unique copy.
Expand Down Expand Up @@ -392,7 +395,7 @@ class COW {
*
* See example in "cow_columns.cpp".
*/
namespace doris::vectorized {
namespace vectorized {
class IColumn;
}
template <typename Base, typename Derived>
Expand Down Expand Up @@ -422,8 +425,14 @@ class COWHelper : public Base {
this->template append_data_by_selector_impl<Derived>(res, selector, begin, end);
}

void insert_from_multi_column(const std::vector<const vectorized::IColumn*>& srcs,
const std::vector<size_t>& positions) override {
this->template insert_from_multi_column_impl<Derived>(srcs, positions);
}

protected:
MutablePtr shallow_mutate() const {
return MutablePtr(static_cast<Derived*>(Base::shallow_mutate().get()));
}
};
};
} // namespace doris

0 comments on commit 659d6eb

Please sign in to comment.