From cabe3b1cfa174a55d45d3db6ce82431f0f9bf6fe Mon Sep 17 00:00:00 2001 From: Xiaoxuan Meng Date: Mon, 9 Sep 2024 20:32:19 -0700 Subject: [PATCH] Fix the extract values function sigature when used with filter process in selective reader (#10956) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10956 The processFilter depends on the exact value callback type match to detect if the filter to drop value or not. If not drop value, it will add value to the result buffer of the selective reader. For null filter type, it add nulls to the null result buffer of the selective reader. The recent refactor changes the extract value callback from T to const T& which breaks the comparison. It tries to add value to the null result buffer which is not initialized in the prepare read of the selective reader as the reader is a filter only reader. This PR reverts the signature change and verified with both failed query shadow and a new unit test to repro this. We will refactor this later to avoid a copy extract value callback later Reviewed By: Yuhta Differential Revision: D62394606 --- velox/dwio/common/ColumnVisitors.h | 4 +- .../common/SelectiveByteRleColumnReader.h | 4 +- .../SelectiveFloatingPointColumnReader.h | 16 ++++--- .../common/SelectiveIntegerColumnReader.h | 8 ++-- .../SelectiveStringDictionaryColumnReader.h | 12 +++--- velox/exec/tests/TableScanTest.cpp | 43 +++++++++++++++++++ 6 files changed, 65 insertions(+), 22 deletions(-) diff --git a/velox/dwio/common/ColumnVisitors.h b/velox/dwio/common/ColumnVisitors.h index 8c4764736d75..0c665b76886a 100644 --- a/velox/dwio/common/ColumnVisitors.h +++ b/velox/dwio/common/ColumnVisitors.h @@ -162,7 +162,7 @@ class ColumnVisitor { TFilter& filter, SelectiveColumnReader* reader, const RowSet& rows, - const ExtractValues& values) + ExtractValues values) : filter_(filter), reader_(reader), allowNulls_(!TFilter::deterministic || filter.testNull()), @@ -719,7 +719,7 @@ class DictionaryColumnVisitor TFilter& filter, SelectiveColumnReader* reader, const RowSet& rows, - const ExtractValues& values) + ExtractValues values) : ColumnVisitor( filter, reader, diff --git a/velox/dwio/common/SelectiveByteRleColumnReader.h b/velox/dwio/common/SelectiveByteRleColumnReader.h index 407cde02ccf5..43b5e3181bda 100644 --- a/velox/dwio/common/SelectiveByteRleColumnReader.h +++ b/velox/dwio/common/SelectiveByteRleColumnReader.h @@ -60,7 +60,7 @@ class SelectiveByteRleColumnReader : public SelectiveColumnReader { void readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); template void readCommon( @@ -77,7 +77,7 @@ template < void SelectiveByteRleColumnReader::readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { reinterpret_cast(this)->readWithVisitor( rows, ColumnVisitor( diff --git a/velox/dwio/common/SelectiveFloatingPointColumnReader.h b/velox/dwio/common/SelectiveFloatingPointColumnReader.h index 02604b3eff64..97eb1e758356 100644 --- a/velox/dwio/common/SelectiveFloatingPointColumnReader.h +++ b/velox/dwio/common/SelectiveFloatingPointColumnReader.h @@ -56,8 +56,10 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader { typename TFilter, bool isDense, typename ExtractValues> - void - readHelper(velox::common::Filter* filter, RowSet rows, ExtractValues values); + void readHelper( + velox::common::Filter* filter, + const RowSet& rows, + ExtractValues values); template < typename Reader, @@ -66,11 +68,11 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader { typename ExtractValues> void processFilter( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues); template - void processValueHook(RowSet rows, ValueHook* hook); + void processValueHook(const RowSet& rows, ValueHook* hook); }; template @@ -81,7 +83,7 @@ template < typename ExtractValues> void SelectiveFloatingPointColumnReader::readHelper( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues) { reinterpret_cast(this)->readWithVisitor( rows, @@ -97,7 +99,7 @@ template < typename ExtractValues> void SelectiveFloatingPointColumnReader::processFilter( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues) { if (filter == nullptr) { readHelper( @@ -144,7 +146,7 @@ void SelectiveFloatingPointColumnReader::processFilter( template template void SelectiveFloatingPointColumnReader::processValueHook( - RowSet rows, + const RowSet& rows, ValueHook* hook) { switch (hook->kind()) { case aggregate::AggregationHook::kDoubleSum: diff --git a/velox/dwio/common/SelectiveIntegerColumnReader.h b/velox/dwio/common/SelectiveIntegerColumnReader.h index 7265e32a7921..d6a31dcbe536 100644 --- a/velox/dwio/common/SelectiveIntegerColumnReader.h +++ b/velox/dwio/common/SelectiveIntegerColumnReader.h @@ -48,7 +48,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader { typename ExtractValues> void processFilter( velox::common::Filter* filter, - const ExtractValues& extractValues, + ExtractValues extractValues, const RowSet& rows); // Switches based on the type of ValueHook between different readWithVisitor @@ -65,7 +65,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader { void readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); // The common part of integer reading. calls the appropriate // instantiation of processValueHook or processFilter based on @@ -82,7 +82,7 @@ template < void SelectiveIntegerColumnReader::readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { switch (valueSize_) { case 2: reinterpret_cast(this)->Reader::readWithVisitor( @@ -124,7 +124,7 @@ template < typename ExtractValues> void SelectiveIntegerColumnReader::processFilter( velox::common::Filter* filter, - const ExtractValues& extractValues, + ExtractValues extractValues, const RowSet& rows) { if (filter == nullptr) { readHelper( diff --git a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h index 3b2dddd88bfd..7583fe550e42 100644 --- a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h @@ -68,16 +68,14 @@ class SelectiveStringDictionaryColumnReader void readWithVisitor(const RowSet& rows, TVisitor visitor); template - void readHelper( - common::Filter* filter, - const RowSet& rows, - const ExtractValues& values); + void + readHelper(common::Filter* filter, const RowSet& rows, ExtractValues values); template void processFilter( common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); // Fills 'values' from 'data' and 'lengthDecoder'. The count of // values is in 'values.numValues'. @@ -129,7 +127,7 @@ template void SelectiveStringDictionaryColumnReader::readHelper( common::Filter* filter, const RowSet& rows, - const ExtractValues& values) { + ExtractValues values) { readWithVisitor( rows, dwio::common:: @@ -141,7 +139,7 @@ template void SelectiveStringDictionaryColumnReader::processFilter( common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { if (filter == nullptr) { readHelper( &dwio::common::alwaysTrue(), rows, extractValues); diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index 87f00cc0d3c1..dc6416f7ac56 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -3153,6 +3153,49 @@ TEST_F(TableScanTest, mapIsNullFilter) { "SELECT * FROM tmp WHERE c0 is null"); } +TEST_F(TableScanTest, stringIsNullFilter) { + constexpr int kSize = 1000; + const char* baseStrings[] = { + "qwertyuiopasdfghjklzxcvbnm", + "qazwsxedcrfvtgbyhnujmikolp", + }; + auto indices = allocateIndices(kSize, pool_.get()); + for (int i = 0; i < kSize; ++i) { + indices->asMutable()[i] = i % 2; + } + auto nullBuffer = + AlignedBuffer::allocate(kSize, pool_.get(), bits::kNotNull); + auto* rawNullBuffer = nullBuffer->asMutable(); + for (int i = 0; i < kSize; i += 100) { + bits::setNull(rawNullBuffer, i); + } + auto dict = BaseVector::wrapInDictionary( + nullBuffer, + indices, + kSize, + makeFlatVector({baseStrings[0], baseStrings[1]})); + + auto rows = makeRowVector({"c0", "c1"}, {dict, dict}); + auto tableType = asRowType(rows->type()); + auto file = TempFilePath::create(); + writeToFile(file->getPath(), {rows}); + createDuckDbTable({rows}); + + const auto outputType = ROW({"c1"}, {VARCHAR()}); + auto makePlan = [&](const std::vector& filters) { + return PlanBuilder() + .tableScan(outputType, filters, "", tableType) + .planNode(); + }; + + assertQuery( + makePlan({"c0 is not null"}), + {file}, + "SELECT c1 FROM tmp WHERE c0 is not null"); + assertQuery( + makePlan({"c0 is null"}), {file}, "SELECT c1 FROM tmp WHERE c0 is null"); +} + TEST_F(TableScanTest, compactComplexNulls) { constexpr int kSize = 10; auto iota = makeFlatVector(kSize, folly::identity);