Skip to content

Commit

Permalink
Fix the extract values function sigature when used with filter proces…
Browse files Browse the repository at this point in the history
…s in selective reader (facebookincubator#10956)

Summary:
Pull Request resolved: facebookincubator#10956

The processFilter depends on the exact value callback type match to detect if the filter to drop value or not.
If not drop value, it will add value to the result buffer of the selective reader. For null filter type, it add nulls to
the null result buffer of the selective reader. The recent refactor changes the extract value callback from
T to const T& which breaks the comparison. It tries to add value to the null result buffer which is not initialized
in the prepare read of the selective reader as the reader is a filter only reader.

This PR reverts the signature change and verified with both failed query shadow and a new unit test to repro this.
We will refactor this later to avoid a copy extract value callback later

Reviewed By: Yuhta

Differential Revision: D62394606
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Sep 10, 2024
1 parent cc10dc2 commit 3135b47
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 22 deletions.
4 changes: 2 additions & 2 deletions velox/dwio/common/ColumnVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class ColumnVisitor {
TFilter& filter,
SelectiveColumnReader* reader,
const RowSet& rows,
const ExtractValues& values)
ExtractValues values)
: filter_(filter),
reader_(reader),
allowNulls_(!TFilter::deterministic || filter.testNull()),
Expand Down Expand Up @@ -719,7 +719,7 @@ class DictionaryColumnVisitor
TFilter& filter,
SelectiveColumnReader* reader,
const RowSet& rows,
const ExtractValues& values)
ExtractValues values)
: ColumnVisitor<T, TFilter, ExtractValues, isDense>(
filter,
reader,
Expand Down
4 changes: 2 additions & 2 deletions velox/dwio/common/SelectiveByteRleColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class SelectiveByteRleColumnReader : public SelectiveColumnReader {
void readHelper(
velox::common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues);
ExtractValues extractValues);

template <typename Reader, bool kEncodingHasNulls>
void readCommon(
Expand All @@ -77,7 +77,7 @@ template <
void SelectiveByteRleColumnReader::readHelper(
velox::common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues) {
ExtractValues extractValues) {
reinterpret_cast<Reader*>(this)->readWithVisitor(
rows,
ColumnVisitor<int8_t, TFilter, ExtractValues, isDense>(
Expand Down
16 changes: 9 additions & 7 deletions velox/dwio/common/SelectiveFloatingPointColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader {
typename TFilter,
bool isDense,
typename ExtractValues>
void
readHelper(velox::common::Filter* filter, RowSet rows, ExtractValues values);
void readHelper(
velox::common::Filter* filter,
const RowSet& rows,
ExtractValues values);

template <
typename Reader,
Expand All @@ -66,11 +68,11 @@ class SelectiveFloatingPointColumnReader : public SelectiveColumnReader {
typename ExtractValues>
void processFilter(
velox::common::Filter* filter,
RowSet rows,
const RowSet& rows,
ExtractValues extractValues);

template <typename Reader, bool isDense>
void processValueHook(RowSet rows, ValueHook* hook);
void processValueHook(const RowSet& rows, ValueHook* hook);
};

template <typename TData, typename TRequested>
Expand All @@ -81,7 +83,7 @@ template <
typename ExtractValues>
void SelectiveFloatingPointColumnReader<TData, TRequested>::readHelper(
velox::common::Filter* filter,
RowSet rows,
const RowSet& rows,
ExtractValues extractValues) {
reinterpret_cast<Reader*>(this)->readWithVisitor(
rows,
Expand All @@ -97,7 +99,7 @@ template <
typename ExtractValues>
void SelectiveFloatingPointColumnReader<TData, TRequested>::processFilter(
velox::common::Filter* filter,
RowSet rows,
const RowSet& rows,
ExtractValues extractValues) {
if (filter == nullptr) {
readHelper<Reader, velox::common::AlwaysTrue, isDense>(
Expand Down Expand Up @@ -144,7 +146,7 @@ void SelectiveFloatingPointColumnReader<TData, TRequested>::processFilter(
template <typename TData, typename TRequested>
template <typename Reader, bool isDense>
void SelectiveFloatingPointColumnReader<TData, TRequested>::processValueHook(
RowSet rows,
const RowSet& rows,
ValueHook* hook) {
switch (hook->kind()) {
case aggregate::AggregationHook::kDoubleSum:
Expand Down
8 changes: 4 additions & 4 deletions velox/dwio/common/SelectiveIntegerColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader {
typename ExtractValues>
void processFilter(
velox::common::Filter* filter,
const ExtractValues& extractValues,
ExtractValues extractValues,
const RowSet& rows);

// Switches based on the type of ValueHook between different readWithVisitor
Expand All @@ -65,7 +65,7 @@ class SelectiveIntegerColumnReader : public SelectiveColumnReader {
void readHelper(
velox::common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues);
ExtractValues extractValues);

// The common part of integer reading. calls the appropriate
// instantiation of processValueHook or processFilter based on
Expand All @@ -82,7 +82,7 @@ template <
void SelectiveIntegerColumnReader::readHelper(
velox::common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues) {
ExtractValues extractValues) {
switch (valueSize_) {
case 2:
reinterpret_cast<Reader*>(this)->Reader::readWithVisitor(
Expand Down Expand Up @@ -124,7 +124,7 @@ template <
typename ExtractValues>
void SelectiveIntegerColumnReader::processFilter(
velox::common::Filter* filter,
const ExtractValues& extractValues,
ExtractValues extractValues,
const RowSet& rows) {
if (filter == nullptr) {
readHelper<Reader, velox::common::AlwaysTrue, isDense>(
Expand Down
12 changes: 5 additions & 7 deletions velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,14 @@ class SelectiveStringDictionaryColumnReader
void readWithVisitor(const RowSet& rows, TVisitor visitor);

template <typename TFilter, bool isDense, typename ExtractValues>
void readHelper(
common::Filter* filter,
const RowSet& rows,
const ExtractValues& values);
void
readHelper(common::Filter* filter, const RowSet& rows, ExtractValues values);

template <bool isDense, typename ExtractValues>
void processFilter(
common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues);
ExtractValues extractValues);

// Fills 'values' from 'data' and 'lengthDecoder'. The count of
// values is in 'values.numValues'.
Expand Down Expand Up @@ -129,7 +127,7 @@ template <typename TFilter, bool isDense, typename ExtractValues>
void SelectiveStringDictionaryColumnReader::readHelper(
common::Filter* filter,
const RowSet& rows,
const ExtractValues& values) {
ExtractValues values) {
readWithVisitor(
rows,
dwio::common::
Expand All @@ -141,7 +139,7 @@ template <bool isDense, typename ExtractValues>
void SelectiveStringDictionaryColumnReader::processFilter(
common::Filter* filter,
const RowSet& rows,
const ExtractValues& extractValues) {
ExtractValues extractValues) {
if (filter == nullptr) {
readHelper<common::AlwaysTrue, isDense>(
&dwio::common::alwaysTrue(), rows, extractValues);
Expand Down
43 changes: 43 additions & 0 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3153,6 +3153,49 @@ TEST_F(TableScanTest, mapIsNullFilter) {
"SELECT * FROM tmp WHERE c0 is null");
}

TEST_F(TableScanTest, stringIsNullFilter) {
constexpr int kSize = 1000;
const char* baseStrings[] = {
"qwertyuiopasdfghjklzxcvbnm",
"qazwsxedcrfvtgbyhnujmikolp",
};
auto indices = allocateIndices(kSize, pool_.get());
for (int i = 0; i < kSize; ++i) {
indices->asMutable<vector_size_t>()[i] = i % 2;
}
auto nullBuffer =
AlignedBuffer::allocate<bool>(kSize, pool_.get(), bits::kNotNull);
auto* rawNullBuffer = nullBuffer->asMutable<uint64_t>();
for (int i = 0; i < kSize; i += 100) {
bits::setNull(rawNullBuffer, i);
}
auto dict = BaseVector::wrapInDictionary(
nullBuffer,
indices,
kSize,
makeFlatVector<std::string>({baseStrings[0], baseStrings[1]}));

auto rows = makeRowVector({"c0", "c1"}, {dict, dict});
auto tableType = asRowType(rows->type());
auto file = TempFilePath::create();
writeToFile(file->getPath(), {rows});
createDuckDbTable({rows});

const auto outputType = ROW({"c1"}, {VARCHAR()});
auto makePlan = [&](const std::vector<std::string>& filters) {
return PlanBuilder()
.tableScan(outputType, filters, "", tableType)
.planNode();
};

assertQuery(
makePlan({"c0 is not null"}),
{file},
"SELECT c1 FROM tmp WHERE c0 is not null");
assertQuery(
makePlan({"c0 is null"}), {file}, "SELECT c1 FROM tmp WHERE c0 is null");
}

TEST_F(TableScanTest, compactComplexNulls) {
constexpr int kSize = 10;
auto iota = makeFlatVector<int64_t>(kSize, folly::identity);
Expand Down

0 comments on commit 3135b47

Please sign in to comment.