Skip to content

Commit

Permalink
Optimize readWithVisitor for TrivialEncoding and MainlyConstantEncodi…
Browse files Browse the repository at this point in the history
…ng (#60)

Summary:
X-link: facebookincubator/velox#10021

Pull Request resolved: #60

- Fast path for `TrivialEncoding::readWithVisitor`
- Fast path for `MainlyConstantEncoding::readWithVisitor`
- Store `encodingType`, `dataType`, `rowCount` in `Encoding` object memory to reduce memory fetch on `data_`
- Use skip functor only in `readWithVisitorSlow` to avoid virtual call cost

bypass-github-export-checks

Reviewed By: oerling

Differential Revision: D58085138

fbshipit-source-id: e308d2c44c8e45f89a2367c8b88f1adb6511b6f9
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 3, 2024
1 parent c2fa2a4 commit acad60d
Show file tree
Hide file tree
Showing 10 changed files with 358 additions and 126 deletions.
3 changes: 1 addition & 2 deletions dwio/nimble/encodings/ConstantEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ template <typename V>
void ConstantEncoding<T>::readWithVisitor(
V& visitor,
ReadWithVisitorParams& params) {
this->template readWithVisitorSlow<false>(
visitor, params, [&] { return value_; });
detail::readWithVisitorSlow(visitor, params, nullptr, [&] { return value_; });
}

template <typename T>
Expand Down
2 changes: 1 addition & 1 deletion dwio/nimble/encodings/DictionaryEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ void DictionaryEncoding<T>::readWithVisitor(
&indicesHook));
indicesVisitor.setRowIndex(startRowIndex);
callReadWithVisitor(*indicesEncoding_, indicesVisitor, params);
this->template readWithVisitorSlow<false>(visitor, params, [&] {
detail::readWithVisitorSlow(visitor, params, nullptr, [&] {
auto index = buffer_[visitor.rowIndex() - startRowIndex];
return alphabet_[index];
});
Expand Down
18 changes: 7 additions & 11 deletions dwio/nimble/encodings/Encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@

namespace facebook::nimble {

EncodingType Encoding::encodingType() const {
return static_cast<EncodingType>(data_[kEncodingTypeOffset]);
}

DataType Encoding::dataType() const {
return static_cast<DataType>(data_[kDataTypeOffset]);
}

uint32_t Encoding::rowCount() const {
return *reinterpret_cast<const uint32_t*>(data_.data() + kRowCountOffset);
}
Encoding::Encoding(velox::memory::MemoryPool& memoryPool, std::string_view data)
: memoryPool_{memoryPool},
data_{data},
encodingType_{data_[kEncodingTypeOffset]},
dataType_{static_cast<DataType>(data_[kDataTypeOffset])},
rowCount_{
*reinterpret_cast<const uint32_t*>(data_.data() + kRowCountOffset)} {}

/* static */ void Encoding::copyIOBuf(char* pos, const folly::IOBuf& buf) {
[[maybe_unused]] size_t length = buf.computeChainDataLength();
Expand Down
69 changes: 45 additions & 24 deletions dwio/nimble/encodings/Encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,20 @@ struct ReadWithVisitorParams {

class Encoding {
public:
Encoding(velox::memory::MemoryPool& memoryPool, std::string_view data)
: memoryPool_{memoryPool}, data_{data} {}
Encoding(velox::memory::MemoryPool& memoryPool, std::string_view data);
virtual ~Encoding() = default;

EncodingType encodingType() const;
DataType dataType() const;
uint32_t rowCount() const;
EncodingType encodingType() const {
return encodingType_;
}

DataType dataType() const {
return dataType_;
}

uint32_t rowCount() const {
return rowCount_;
}

static void copyIOBuf(char* pos, const folly::IOBuf& buf);

Expand Down Expand Up @@ -219,14 +226,11 @@ class Encoding {
uint32_t rowCount,
char*& pos);

template <bool kSkip, typename DecoderVisitor, typename F>
void readWithVisitorSlow(
DecoderVisitor& visitor,
const ReadWithVisitorParams& params,
F&& decodeOne);

velox::memory::MemoryPool& memoryPool_;
const std::string_view data_;
const EncodingType encodingType_;
const DataType dataType_;
const uint32_t rowCount_;
};

// The TypedEncoding<physicalType> class exposes the same interface as the base
Expand Down Expand Up @@ -358,7 +362,7 @@ T castFromPhysicalType(const PhysicalType& value) {
}
}

template <bool kSkip, typename DecoderVisitor, typename Skip, typename F>
template <typename DecoderVisitor, typename Skip, typename F>
void readWithVisitorSlow(
DecoderVisitor& visitor,
const ReadWithVisitorParams& params,
Expand All @@ -375,7 +379,7 @@ void readWithVisitorSlow(
auto numScanned = params.numScanned;
bool atEnd = false;
while (!atEnd) {
if constexpr (kSkip) {
if constexpr (!std::is_null_pointer_v<Skip>) {
auto numNonNulls = visitor.currentRow() - numScanned;
if (nulls) {
numNonNulls -=
Expand Down Expand Up @@ -499,18 +503,35 @@ void readWithVisitorFast(
}
}

} // namespace detail
// DataType is the type of DecoderVisitor::DataType. The corresponding
// ValueType is the type we store in values buffer of selective column reader.
template <typename DataType>
using ValueType = std::conditional_t<
std::is_same_v<DataType, folly::StringPiece>,
velox::StringView,
DataType>;

template <typename V, typename DataType>
ValueType<DataType> dataToValue(const V& visitor, DataType data) {
if constexpr (std::is_same_v<DataType, folly::StringPiece>) {
return visitor.reader().copyStringValueIfNeed(data);
} else {
return data;
}
}

template <bool kSkip, typename DecoderVisitor, typename F>
void Encoding::readWithVisitorSlow(
DecoderVisitor& visitor,
const ReadWithVisitorParams& params,
F&& decodeOne) {
detail::readWithVisitorSlow<kSkip>(
visitor,
params,
[&](auto toSkip) { skip(toSkip); },
std::forward<F>(decodeOne));
template <typename T, typename V>
T* mutableValues(const V& visitor, vector_size_t size) {
T* values = visitor.reader().template mutableValues<T>(size);
if constexpr (V::kHasHook) {
// Use end of the region to avoid overwrite values in previous chunk
// with dictionary indices.
values += visitor.reader().valuesCapacity() / sizeof(T) - size -
visitor.reader().numValues();
}
return values;
}

} // namespace detail

} // namespace facebook::nimble
12 changes: 8 additions & 4 deletions dwio/nimble/encodings/FixedBitWidthEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,14 @@ template <typename V>
void FixedBitWidthEncoding<T>::readWithVisitor(
V& visitor,
ReadWithVisitorParams& params) {
this->template readWithVisitorSlow<true>(visitor, params, [&] {
physicalType value = fixedBitArray_.get(row_++) + baseline_;
return value;
});
detail::readWithVisitorSlow(
visitor,
params,
[&](auto toSkip) { skip(toSkip); },
[&] {
physicalType value = fixedBitArray_.get(row_++) + baseline_;
return value;
});
}

template <typename T>
Expand Down
166 changes: 156 additions & 10 deletions dwio/nimble/encodings/MainlyConstantEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ class MainlyConstantEncoding final
template <typename DecoderVisitor>
void readWithVisitor(DecoderVisitor& visitor, ReadWithVisitorParams& params);

template <bool kScatter, typename Visitor>
void bulkScan(
Visitor& visitor,
vector_size_t currentNonNullRow,
const vector_size_t* nonNullRows,
vector_size_t numNonNulls,
const vector_size_t* scatterRows);

static std::string_view encode(
EncodingSelection<physicalType>& selection,
std::span<const physicalType> values,
Expand Down Expand Up @@ -171,21 +179,159 @@ void MainlyConstantEncoding<T>::materialize(uint32_t rowCount, void* buffer) {
"Encoding size mismatch.");
}

template <typename T>
template <bool kScatter, typename V>
void MainlyConstantEncoding<T>::bulkScan(
V& visitor,
vector_size_t currentNonNullRow,
const vector_size_t* nonNullRows,
vector_size_t numNonNulls,
const vector_size_t* scatterRows) {
using DataType = typename V::DataType;
using ValueType = detail::ValueType<DataType>;
constexpr bool kScatterValues = kScatter && !V::kHasFilter && !V::kHasHook;
ValueType* values;
const auto commonData = detail::castFromPhysicalType<DataType>(commonValue_);
const bool commonPassed =
velox::common::applyFilter(visitor.filter(), commonData);
if constexpr (!V::kFilterOnly) {
auto numRows = visitor.numRows() - visitor.rowIndex();
values = detail::mutableValues<ValueType>(visitor, numRows);
if (commonPassed) {
auto commonValue = detail::dataToValue(visitor, commonData);
std::fill(values, values + numRows, commonValue);
}
}
const auto numIsCommon = nonNullRows[numNonNulls - 1] + 1 - currentNonNullRow;
isCommonBuffer_.resize(velox::bits::nwords(numIsCommon) * sizeof(uint64_t));
auto* isCommon = reinterpret_cast<uint64_t*>(isCommonBuffer_.data());
// TODO: Wrap otherValues_ in BufferedEncoding. This way when isCommon_ is
// SparseBoolEncoding or RLE, we can materialize it on demand and do not need
// to allocate memory for the indices.
isCommon_->materializeBoolsAsBits(numIsCommon, isCommon, 0);
auto numOtherValues =
numIsCommon - velox::bits::countBits(isCommon, 0, numIsCommon);
otherValuesBuffer_.resize(numOtherValues);
otherValues_->materialize(numOtherValues, otherValuesBuffer_.data());
numOtherValues = 0;
auto* filterHits = V::kHasFilter ? visitor.outputRows(numNonNulls) : nullptr;
auto* rows = kScatter ? scatterRows : nonNullRows;
vector_size_t numValues = 0;
vector_size_t numHits = 0;
vector_size_t nonNullRowIndex = 0;
velox::bits::forEachUnsetBit(isCommon, 0, numIsCommon, [&](vector_size_t i) {
i += currentNonNullRow;
auto commonBegin = nonNullRowIndex;
if constexpr (V::dense) {
nonNullRowIndex += i - nonNullRows[nonNullRowIndex];
} else {
while (nonNullRows[nonNullRowIndex] < i) {
++nonNullRowIndex;
}
}
const auto numCommon = nonNullRowIndex - commonBegin;
if (V::kHasFilter && commonPassed && numCommon > 0) {
auto* begin = rows + commonBegin;
std::copy(begin, begin + numCommon, filterHits + numHits);
numHits += numCommon;
}
if (nonNullRows[nonNullRowIndex] > i) {
if constexpr (!V::kFilterOnly) {
vector_size_t numRows;
if constexpr (kScatterValues) {
numRows = scatterRows[nonNullRowIndex] - visitor.rowIndex();
visitor.addRowIndex(numRows);
} else {
numRows = commonPassed * numCommon;
}
numValues += numRows;
}
++numOtherValues;
return;
}
auto otherData = detail::castFromPhysicalType<DataType>(
otherValuesBuffer_[numOtherValues++]);
bool otherPassed;
if constexpr (V::kHasFilter) {
otherPassed = velox::common::applyFilter(visitor.filter(), otherData);
if (otherPassed) {
filterHits[numHits++] = rows[nonNullRowIndex];
}
} else {
otherPassed = true;
}
if constexpr (!V::kFilterOnly) {
auto* begin = values + numValues;
vector_size_t numRows;
if constexpr (kScatterValues) {
begin[scatterRows[nonNullRowIndex] - visitor.rowIndex()] =
detail::dataToValue(visitor, otherData);
auto end = nonNullRowIndex + 1;
if (FOLLY_UNLIKELY(end == numNonNulls)) {
numRows = visitor.numRows() - visitor.rowIndex();
} else {
numRows = scatterRows[end] - visitor.rowIndex();
}
visitor.addRowIndex(numRows);
} else {
numRows = commonPassed * numCommon;
if (otherPassed) {
begin[numRows++] = detail::dataToValue(visitor, otherData);
}
}
numValues += numRows;
}
++nonNullRowIndex;
});
auto numCommon = numNonNulls - nonNullRowIndex;
if (commonPassed && numCommon > 0) {
if constexpr (V::kHasFilter) {
auto* begin = rows + nonNullRowIndex;
std::copy(begin, begin + numCommon, filterHits + numHits);
numHits += numCommon;
}
if constexpr (!V::kFilterOnly) {
if constexpr (kScatterValues) {
numValues += visitor.numRows() - visitor.rowIndex();
} else {
numValues += numCommon;
}
}
}
visitor.setRowIndex(visitor.numRows());
if constexpr (V::kHasHook) {
NIMBLE_DASSERT(numValues == numNonNulls, "");
visitor.hook().addValues(
scatterRows, values, numNonNulls, sizeof(ValueType));
} else {
visitor.addNumValues(V::kFilterOnly ? numHits : numValues);
}
}

template <typename T>
template <typename V>
void MainlyConstantEncoding<T>::readWithVisitor(
V& visitor,
ReadWithVisitorParams& params) {
this->template readWithVisitorSlow<true>(visitor, params, [&] {
bool isCommon;
isCommon_->materialize(1, &isCommon);
if (isCommon) {
return commonValue_;
}
physicalType otherValue;
otherValues_->materialize(1, &otherValue);
return otherValue;
});
auto* nulls = visitor.reader().rawNullsInReadRange();
if (velox::dwio::common::useFastPath(visitor, nulls)) {
detail::readWithVisitorFast(*this, visitor, params, nulls);
return;
}
detail::readWithVisitorSlow(
visitor,
params,
[&](auto toSkip) { skip(toSkip); },
[&] {
bool isCommon;
isCommon_->materialize(1, &isCommon);
if (isCommon) {
return commonValue_;
}
physicalType otherValue;
otherValues_->materialize(1, &otherValue);
return otherValue;
});
}

namespace internal {} // namespace internal
Expand Down
Loading

0 comments on commit acad60d

Please sign in to comment.