Skip to content

Commit

Permalink
Fix conversion of VARBINARY values to variant in QueryAssertions.* (f…
Browse files Browse the repository at this point in the history
…acebookincubator#7618)

Summary:
VARBINARY result from Velox and DuckDB used to be converted to variant with
TypeKind::VARCHAR causing confusing failures in the AggregationFuzzer.

Pull Request resolved: facebookincubator#7618

Reviewed By: xiaoxmeng

Differential Revision: D51416568

Pulled By: mbasmanova

fbshipit-source-id: e673da4d668dc93f933a40a0095ab1d5dd3506e9
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Nov 17, 2023
1 parent 46f47e9 commit 3d8a881
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 57 deletions.
17 changes: 17 additions & 0 deletions velox/exec/tests/QueryAssertionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,4 +452,21 @@ TEST_F(QueryAssertionsTest, nullVariant) {
assertQuery(plan, "SELECT * FROM tmp");
}

TEST_F(QueryAssertionsTest, varbinary) {
auto data = makeRowVector({makeFlatVector<std::string>(
{"Short string", "Longer strings...", "abc"}, VARBINARY())});

auto rowType = asRowType(data->type());

createDuckDbTable({data});

auto duckResult = duckDbQueryRunner_.execute("SELECT * FROM tmp", rowType);
ASSERT_EQ(duckResult.size(), data->size());
ASSERT_EQ(duckResult.begin()->begin()->kind(), TypeKind::VARBINARY);
ASSERT_TRUE(assertEqualResults(duckResult, rowType, {data}));

auto plan = PlanBuilder().values({data}).planNode();
assertQuery(plan, "SELECT * FROM tmp");
}

} // namespace facebook::velox::test
80 changes: 39 additions & 41 deletions velox/exec/tests/utils/QueryAssertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,41 +188,40 @@ ::duckdb::Value duckValueAt<TypeKind::MAP>(
}

template <TypeKind kind>
velox::variant
variantAt(::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) {
variant variantAt(::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) {
using T = typename KindToFlatVector<kind>::WrapperType;
return velox::variant(dataChunk->GetValue(column, row).GetValue<T>());
return variant(dataChunk->GetValue(column, row).GetValue<T>());
}

template <>
velox::variant variantAt<TypeKind::VARCHAR>(
variant variantAt<TypeKind::VARCHAR>(
::duckdb::DataChunk* dataChunk,
int32_t row,
int32_t column) {
return velox::variant(
return variant(
StringView(::duckdb::StringValue::Get(dataChunk->GetValue(column, row))));
}

template <>
velox::variant variantAt<TypeKind::VARBINARY>(
variant variantAt<TypeKind::VARBINARY>(
::duckdb::DataChunk* dataChunk,
int32_t row,
int32_t column) {
return velox::variant(
return variant::binary(
StringView(::duckdb::StringValue::Get(dataChunk->GetValue(column, row))));
}

template <>
velox::variant variantAt<TypeKind::TIMESTAMP>(
variant variantAt<TypeKind::TIMESTAMP>(
::duckdb::DataChunk* dataChunk,
int32_t row,
int32_t column) {
return velox::variant::timestamp(duckdbTimestampToVelox(
return variant::timestamp(duckdbTimestampToVelox(
dataChunk->GetValue(column, row).GetValue<::duckdb::timestamp_t>()));
}

template <TypeKind kind>
velox::variant variantAt(const ::duckdb::Value& value) {
variant variantAt(const ::duckdb::Value& value) {
if (value.type() == ::duckdb::LogicalType::INTERVAL) {
return ::duckdb::Interval::GetMicro(value.GetValue<::duckdb::interval_t>());
} else if (value.type() == ::duckdb::LogicalType::DATE) {
Expand All @@ -231,34 +230,32 @@ velox::variant variantAt(const ::duckdb::Value& value) {
// NOTE: duckdb only support native cpp type for GetValue so we need to use
// DeepCopiedType instead of WrapperType here.
using T = typename TypeTraits<kind>::DeepCopiedType;
return velox::variant(value.GetValue<T>());
return variant(value.GetValue<T>());
}
}

template <>
velox::variant variantAt<TypeKind::TIMESTAMP>(const ::duckdb::Value& value) {
return velox::variant::timestamp(
variant variantAt<TypeKind::TIMESTAMP>(const ::duckdb::Value& value) {
return variant::timestamp(
duckdbTimestampToVelox(value.GetValue<::duckdb::timestamp_t>()));
}

template <>
velox::variant variantAt<TypeKind::VARCHAR>(const ::duckdb::Value& value) {
return velox::variant(StringView(::duckdb::StringValue::Get(value)));
variant variantAt<TypeKind::VARCHAR>(const ::duckdb::Value& value) {
return variant(StringView(::duckdb::StringValue::Get(value)));
}

template <>
velox::variant variantAt<TypeKind::VARBINARY>(const ::duckdb::Value& value) {
return velox::variant(StringView(::duckdb::StringValue::Get(value)));
variant variantAt<TypeKind::VARBINARY>(const ::duckdb::Value& value) {
return variant::binary(StringView(::duckdb::StringValue::Get(value)));
}

variant nullVariant(const TypePtr& type) {
return variant(type->kind());
}

velox::variant rowVariantAt(
const ::duckdb::Value& vector,
const TypePtr& rowType) {
std::vector<velox::variant> values;
variant rowVariantAt(const ::duckdb::Value& vector, const TypePtr& rowType) {
std::vector<variant> values;
const auto& structValue = ::duckdb::StructValue::GetChildren(vector);
for (size_t i = 0; i < structValue.size(); ++i) {
auto currChild = structValue[i];
Expand All @@ -274,12 +271,10 @@ velox::variant rowVariantAt(
values.push_back(value);
}
}
return velox::variant::row(std::move(values));
return variant::row(std::move(values));
}

velox::variant mapVariantAt(
const ::duckdb::Value& vector,
const TypePtr& mapType) {
variant mapVariantAt(const ::duckdb::Value& vector, const TypePtr& mapType) {
std::map<variant, variant> map;

const auto& mapValue = ::duckdb::StructValue::GetChildren(vector);
Expand Down Expand Up @@ -309,10 +304,10 @@ velox::variant mapVariantAt(
}
map.insert({variantKey, variantValue});
}
return velox::variant::map(map);
return variant::map(map);
}

velox::variant arrayVariantAt(
variant arrayVariantAt(
const ::duckdb::Value& vector,
const TypePtr& arrayType) {
std::vector<variant> array;
Expand All @@ -334,7 +329,7 @@ velox::variant arrayVariantAt(
array.push_back(variant);
}
}
return velox::variant::array(std::move(array));
return variant::array(std::move(array));
}

std::vector<MaterializedRow> materialize(
Expand Down Expand Up @@ -389,31 +384,36 @@ std::vector<MaterializedRow> materialize(
}

template <TypeKind kind>
velox::variant variantAt(VectorPtr vector, int32_t row) {
variant variantAt(VectorPtr vector, int32_t row) {
using T = typename KindToFlatVector<kind>::WrapperType;
return velox::variant(vector->as<SimpleVector<T>>()->valueAt(row));
return variant(vector->as<SimpleVector<T>>()->valueAt(row));
}

template <>
variant variantAt<TypeKind::VARBINARY>(VectorPtr vector, int32_t row) {
return variant::binary(vector->as<SimpleVector<StringView>>()->valueAt(row));
}

variant variantAt(const VectorPtr& vector, vector_size_t row);

velox::variant arrayVariantAt(const VectorPtr& vector, vector_size_t row) {
variant arrayVariantAt(const VectorPtr& vector, vector_size_t row) {
auto arrayVector = vector->wrappedVector()->as<ArrayVector>();
auto& elements = arrayVector->elements();

auto wrappedRow = vector->wrappedIndex(row);
auto offset = arrayVector->offsetAt(wrappedRow);
auto size = arrayVector->sizeAt(wrappedRow);

std::vector<velox::variant> array;
std::vector<variant> array;
array.reserve(size);
for (auto i = 0; i < size; i++) {
auto innerRow = offset + i;
array.push_back(variantAt(elements, innerRow));
}
return velox::variant::array(array);
return variant::array(array);
}

velox::variant mapVariantAt(const VectorPtr& vector, vector_size_t row) {
variant mapVariantAt(const VectorPtr& vector, vector_size_t row) {
auto mapVector = vector->wrappedVector()->as<MapVector>();
auto& mapKeys = mapVector->mapKeys();
auto& mapValues = mapVector->mapValues();
Expand All @@ -429,18 +429,18 @@ velox::variant mapVariantAt(const VectorPtr& vector, vector_size_t row) {
auto value = variantAt(mapValues, innerRow);
map.insert({key, value});
}
return velox::variant::map(map);
return variant::map(map);
}

velox::variant rowVariantAt(const VectorPtr& vector, vector_size_t row) {
variant rowVariantAt(const VectorPtr& vector, vector_size_t row) {
auto rowValues = vector->wrappedVector()->as<RowVector>();
auto wrappedRow = vector->wrappedIndex(row);

std::vector<velox::variant> values;
std::vector<variant> values;
for (auto& child : rowValues->children()) {
values.push_back(variantAt(child, wrappedRow));
}
return velox::variant::row(std::move(values));
return variant::row(std::move(values));
}

variant variantAt(const VectorPtr& vector, vector_size_t row) {
Expand Down Expand Up @@ -1404,9 +1404,7 @@ std::shared_ptr<Task> assertQuery(
return result.first->task();
}

velox::variant readSingleValue(
const core::PlanNodePtr& plan,
int32_t maxDrivers) {
variant readSingleValue(const core::PlanNodePtr& plan, int32_t maxDrivers) {
CursorParameters params;
params.planNode = plan;
params.maxDrivers = maxDrivers;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class BloomFilterAggAggregateTest
allowInputShuffle();
}

std::string getSerializedBloomFilter(int32_t capacity) {
VectorPtr getSerializedBloomFilter(int32_t capacity) {
BloomFilter bloomFilter;
bloomFilter.reset(capacity);
for (auto i = 0; i < 9; ++i) {
Expand All @@ -41,40 +41,33 @@ class BloomFilterAggAggregateTest
std::string data;
data.resize(bloomFilter.serializedSize());
bloomFilter.serialize(data.data());
return data;
return makeConstant(StringView(data), 1, VARBINARY());
}
};
} // namespace

TEST_F(BloomFilterAggAggregateTest, basic) {
auto vectors = {makeRowVector({makeFlatVector<int64_t>(
100, [](vector_size_t row) { return row % 9; })})};
auto bloomFilter = getSerializedBloomFilter(4);
auto expected = {
makeRowVector({makeConstant<StringView>(StringView(bloomFilter), 1)})};

auto expected = {makeRowVector({getSerializedBloomFilter(4)})};
testAggregations(vectors, {}, {"bloom_filter_agg(c0, 5, 64)"}, expected);
}

TEST_F(BloomFilterAggAggregateTest, bloomFilterAggArgument) {
auto vectors = {makeRowVector({makeFlatVector<int64_t>(
100, [](vector_size_t row) { return row % 9; })})};

auto bloomFilter1 = getSerializedBloomFilter(3);
auto expected1 = {
makeRowVector({makeConstant<StringView>(StringView(bloomFilter1), 1)})};
auto expected1 = {makeRowVector({getSerializedBloomFilter(3)})};
testAggregations(vectors, {}, {"bloom_filter_agg(c0, 6)"}, expected1);

// This capacity is kMaxNumBits / 16.
auto bloomFilter2 = getSerializedBloomFilter(262144);
auto expected2 = {
makeRowVector({makeConstant<StringView>(StringView(bloomFilter2), 1)})};
auto expected2 = {makeRowVector({getSerializedBloomFilter(262144)})};
testAggregations(vectors, {}, {"bloom_filter_agg(c0)"}, expected2);
}

TEST_F(BloomFilterAggAggregateTest, emptyInput) {
auto vectors = {makeRowVector({makeFlatVector<int64_t>({})})};
auto expected = {makeRowVector(
{makeNullableFlatVector<StringView>({std::nullopt}, VARBINARY())})};
auto expected = {makeRowVector({makeNullConstant(TypeKind::VARBINARY, 1)})};
testAggregations(vectors, {}, {"bloom_filter_agg(c0, 5, 64)"}, expected);
}

Expand All @@ -91,9 +84,8 @@ TEST_F(BloomFilterAggAggregateTest, nullBloomFilter) {
TEST_F(BloomFilterAggAggregateTest, config) {
auto vector = {makeRowVector({makeFlatVector<int64_t>(
100, [](vector_size_t row) { return row % 9; })})};
auto bloomFilter = getSerializedBloomFilter(100);
std::vector<RowVectorPtr> expected = {
makeRowVector({makeConstant<StringView>(StringView(bloomFilter), 1)})};
makeRowVector({getSerializedBloomFilter(100)})};

// This config will decide the bloom filter capacity, the expected value is
// the serialized bloom filter, it should be consistent.
Expand Down

0 comments on commit 3d8a881

Please sign in to comment.