diff --git a/velox/dwio/common/ColumnVisitors.h b/velox/dwio/common/ColumnVisitors.h index 8c4764736d75..0c665b76886a 100644 --- a/velox/dwio/common/ColumnVisitors.h +++ b/velox/dwio/common/ColumnVisitors.h @@ -162,7 +162,7 @@ class ColumnVisitor { TFilter& filter, SelectiveColumnReader* reader, const RowSet& rows, - const ExtractValues& values) + ExtractValues values) : filter_(filter), reader_(reader), allowNulls_(!TFilter::deterministic || filter.testNull()), @@ -719,7 +719,7 @@ class DictionaryColumnVisitor TFilter& filter, SelectiveColumnReader* reader, const RowSet& rows, - const ExtractValues& values) + ExtractValues values) : ColumnVisitor( filter, reader, diff --git a/velox/dwio/common/SelectiveByteRleColumnReader.h b/velox/dwio/common/SelectiveByteRleColumnReader.h index 407cde02ccf5..43b5e3181bda 100644 --- a/velox/dwio/common/SelectiveByteRleColumnReader.h +++ b/velox/dwio/common/SelectiveByteRleColumnReader.h @@ -60,7 +60,7 @@ class SelectiveByteRleColumnReader : public SelectiveColumnReader { void readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); template void readCommon( @@ -77,7 +77,7 @@ template < void SelectiveByteRleColumnReader::readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { reinterpret_cast(this)->readWithVisitor( rows, ColumnVisitor( diff --git a/velox/dwio/common/SelectiveFloatingPointColumnReader.h b/velox/dwio/common/SelectiveFloatingPointColumnReader.h index 02604b3eff64..97eb1e758356 100644 --- a/velox/dwio/common/SelectiveFloatingPointColumnReader.h +++ b/velox/dwio/common/SelectiveFloatingPointColumnReader.h @@ -56,8 +56,10 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader { typename TFilter, bool isDense, typename ExtractValues> - void - readHelper(velox::common::Filter* filter, RowSet rows, ExtractValues values); + void readHelper( + velox::common::Filter* filter, + const RowSet& rows, + ExtractValues values); template < typename Reader, @@ -66,11 +68,11 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader { typename ExtractValues> void processFilter( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues); template - void processValueHook(RowSet rows, ValueHook* hook); + void processValueHook(const RowSet& rows, ValueHook* hook); }; template @@ -81,7 +83,7 @@ template < typename ExtractValues> void SelectiveFloatingPointColumnReader::readHelper( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues) { reinterpret_cast(this)->readWithVisitor( rows, @@ -97,7 +99,7 @@ template < typename ExtractValues> void SelectiveFloatingPointColumnReader::processFilter( velox::common::Filter* filter, - RowSet rows, + const RowSet& rows, ExtractValues extractValues) { if (filter == nullptr) { readHelper( @@ -144,7 +146,7 @@ void SelectiveFloatingPointColumnReader::processFilter( template template void SelectiveFloatingPointColumnReader::processValueHook( - RowSet rows, + const RowSet& rows, ValueHook* hook) { switch (hook->kind()) { case aggregate::AggregationHook::kDoubleSum: diff --git a/velox/dwio/common/SelectiveIntegerColumnReader.h b/velox/dwio/common/SelectiveIntegerColumnReader.h index 7265e32a7921..d6a31dcbe536 100644 --- a/velox/dwio/common/SelectiveIntegerColumnReader.h +++ b/velox/dwio/common/SelectiveIntegerColumnReader.h @@ -48,7 +48,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader { typename ExtractValues> void processFilter( velox::common::Filter* filter, - const ExtractValues& extractValues, + ExtractValues extractValues, const RowSet& rows); // Switches based on the type of ValueHook between different readWithVisitor @@ -65,7 +65,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader { void readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); // The common part of integer reading. calls the appropriate // instantiation of processValueHook or processFilter based on @@ -82,7 +82,7 @@ template < void SelectiveIntegerColumnReader::readHelper( velox::common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { switch (valueSize_) { case 2: reinterpret_cast(this)->Reader::readWithVisitor( @@ -124,7 +124,7 @@ template < typename ExtractValues> void SelectiveIntegerColumnReader::processFilter( velox::common::Filter* filter, - const ExtractValues& extractValues, + ExtractValues extractValues, const RowSet& rows) { if (filter == nullptr) { readHelper( diff --git a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h index 3b2dddd88bfd..7583fe550e42 100644 --- a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h @@ -68,16 +68,14 @@ class SelectiveStringDictionaryColumnReader void readWithVisitor(const RowSet& rows, TVisitor visitor); template - void readHelper( - common::Filter* filter, - const RowSet& rows, - const ExtractValues& values); + void + readHelper(common::Filter* filter, const RowSet& rows, ExtractValues values); template void processFilter( common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues); + ExtractValues extractValues); // Fills 'values' from 'data' and 'lengthDecoder'. The count of // values is in 'values.numValues'. @@ -129,7 +127,7 @@ template void SelectiveStringDictionaryColumnReader::readHelper( common::Filter* filter, const RowSet& rows, - const ExtractValues& values) { + ExtractValues values) { readWithVisitor( rows, dwio::common:: @@ -141,7 +139,7 @@ template void SelectiveStringDictionaryColumnReader::processFilter( common::Filter* filter, const RowSet& rows, - const ExtractValues& extractValues) { + ExtractValues extractValues) { if (filter == nullptr) { readHelper( &dwio::common::alwaysTrue(), rows, extractValues); diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index 87f00cc0d3c1..dc6416f7ac56 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -3153,6 +3153,49 @@ TEST_F(TableScanTest, mapIsNullFilter) { "SELECT * FROM tmp WHERE c0 is null"); } +TEST_F(TableScanTest, stringIsNullFilter) { + constexpr int kSize = 1000; + const char* baseStrings[] = { + "qwertyuiopasdfghjklzxcvbnm", + "qazwsxedcrfvtgbyhnujmikolp", + }; + auto indices = allocateIndices(kSize, pool_.get()); + for (int i = 0; i < kSize; ++i) { + indices->asMutable()[i] = i % 2; + } + auto nullBuffer = + AlignedBuffer::allocate(kSize, pool_.get(), bits::kNotNull); + auto* rawNullBuffer = nullBuffer->asMutable(); + for (int i = 0; i < kSize; i += 100) { + bits::setNull(rawNullBuffer, i); + } + auto dict = BaseVector::wrapInDictionary( + nullBuffer, + indices, + kSize, + makeFlatVector({baseStrings[0], baseStrings[1]})); + + auto rows = makeRowVector({"c0", "c1"}, {dict, dict}); + auto tableType = asRowType(rows->type()); + auto file = TempFilePath::create(); + writeToFile(file->getPath(), {rows}); + createDuckDbTable({rows}); + + const auto outputType = ROW({"c1"}, {VARCHAR()}); + auto makePlan = [&](const std::vector& filters) { + return PlanBuilder() + .tableScan(outputType, filters, "", tableType) + .planNode(); + }; + + assertQuery( + makePlan({"c0 is not null"}), + {file}, + "SELECT c1 FROM tmp WHERE c0 is not null"); + assertQuery( + makePlan({"c0 is null"}), {file}, "SELECT c1 FROM tmp WHERE c0 is null"); +} + TEST_F(TableScanTest, compactComplexNulls) { constexpr int kSize = 10; auto iota = makeFlatVector(kSize, folly::identity);