diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index ca25b230bceec3e..a0e8b3fd0eecd84 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -24,6 +24,7 @@ #include "olap/rowset/segment_v2/column_reader.h" #include "vec/columns/column.h" #include "vec/columns/column_map.h" +#include "vec/columns/column_nullable.h" #include "vec/columns/column_object.h" #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" @@ -82,6 +83,10 @@ Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) { RETURN_IF_ERROR(_root_reader->iterator->init(opts)); _root_reader->inited = true; } + if (_sparse_column_reader && !_sparse_column_reader->inited) { + RETURN_IF_ERROR(_sparse_column_reader->iterator->init(opts)); + _sparse_column_reader->inited = true; + } return Status::OK(); } @@ -402,15 +407,23 @@ Status SparseColumnExtractReader::seek_to_ordinal(ordinal_t ord) { } void SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr& dst) { + vectorized::ColumnNullable* nullable_column = nullptr; + if (dst->is_nullable()) { + nullable_column = assert_cast(dst.get()); + } vectorized::ColumnObject& var = - dst->is_nullable() - ? assert_cast( - assert_cast(*dst).get_nested_column()) + nullable_column != nullptr + ? assert_cast(nullable_column->get_nested_column()) : assert_cast(*dst); - DCHECK(!var.is_null_root()); - vectorized::ColumnObject::fill_path_olumn_from_sparse_data( - *var.get_subcolumn({}) /*root*/, StringRef {_path.data(), _path.size()}, + if (var.is_null_root()) { + var.add_sub_column({}, dst->size()); + } + vectorized::NullMap* null_map = + nullable_column ? &nullable_column->get_null_map_data() : nullptr; + vectorized::ColumnObject::fill_path_column_from_sparse_data( + *var.get_subcolumn({}) /*root*/, null_map, StringRef {_path.data(), _path.size()}, _sparse_column->get_ptr(), 0, _sparse_column->size()); + var.incr_num_rows(_sparse_column->size()); _sparse_column->clear(); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 2b64f7f392f250b..862ca880df2af10 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -2041,6 +2041,7 @@ Status ColumnObject::finalize(FinalizeMode mode) { new_subcolumns.get_mutable_root()->data.finalize(mode); } else if (mode == FinalizeMode::WRITE_MODE) { new_subcolumns.create_root(Subcolumn(num_rows, is_nullable, true)); + new_subcolumns.get_mutable_root()->data.finalize(mode); } const bool need_pick_subcolumn_to_sparse_column = @@ -2493,7 +2494,8 @@ size_t ColumnObject::find_path_lower_bound_in_sparse_data(StringRef path, return it.index; } -void ColumnObject::fill_path_olumn_from_sparse_data(Subcolumn& subcolumn, StringRef path, +void ColumnObject::fill_path_column_from_sparse_data(Subcolumn& subcolumn, NullMap* null_map, + StringRef path, const ColumnPtr& sparse_data_column, size_t start, size_t end) { const auto& sparse_data_map = assert_cast(*sparse_data_column); @@ -2502,6 +2504,9 @@ void ColumnObject::fill_path_olumn_from_sparse_data(Subcolumn& subcolumn, String size_t last_offset = sparse_data_offsets[static_cast(end) - 1]; // Check if we have at least one row with data. if (first_offset == last_offset) { + if (null_map) { + null_map->resize_fill(end - start, 1); + } subcolumn.insert_many_defaults(end - start); return; } @@ -2513,6 +2518,7 @@ void ColumnObject::fill_path_olumn_from_sparse_data(Subcolumn& subcolumn, String size_t paths_end = sparse_data_offsets[static_cast(i)]; auto lower_bound_path_index = ColumnObject::find_path_lower_bound_in_sparse_data( path, sparse_data_paths, paths_start, paths_end); + bool is_null = false; if (lower_bound_path_index != paths_end && sparse_data_paths.get_data_at(lower_bound_path_index) == path) { // auto value_data = sparse_data_values.get_data_at(lower_bound_path_index); @@ -2521,8 +2527,13 @@ void ColumnObject::fill_path_olumn_from_sparse_data(Subcolumn& subcolumn, String const auto& data = ColumnObject::deserialize_from_sparse_column(&sparse_data_values, lower_bound_path_index); subcolumn.insert(data.first, data.second); + is_null = false; } else { subcolumn.insert_default(); + is_null = true; + } + if (null_map) { + null_map->push_back(is_null); } } } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index f8ba93ef8247476..210fd9a68a51597 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -39,6 +39,7 @@ #include "util/jsonb_document.h" #include "vec/columns/column.h" #include "vec/columns/column_map.h" +#include "vec/columns/column_nullable.h" #include "vec/columns/subcolumn_tree.h" #include "vec/common/cow.h" #include "vec/common/string_ref.h" @@ -586,7 +587,8 @@ class ColumnObject final : public COWHelper { return {&key, &value}; } // Insert all the data from sparse data with specified path to sub column. - static void fill_path_olumn_from_sparse_data(Subcolumn& subcolumn, StringRef path, + static void fill_path_column_from_sparse_data(Subcolumn& subcolumn, NullMap* null_map, + StringRef path, const ColumnPtr& sparse_data_column, size_t start, size_t end);