diff --git a/velox/functions/lib/Re2Functions.cpp b/velox/functions/lib/Re2Functions.cpp index ef1291fbe9d5..78ce3e2f29b5 100644 --- a/velox/functions/lib/Re2Functions.cpp +++ b/velox/functions/lib/Re2Functions.cpp @@ -144,8 +144,14 @@ bool re2Extract( } } else { const re2::StringPiece extracted = groups[groupId]; - result.setNoCopy(row, StringView(extracted.data(), extracted.size())); - return !StringView::isInline(extracted.size()); + // Check if the extracted data is null. + if (extracted.data()) { + result.setNoCopy(row, StringView(extracted.data(), extracted.size())); + return !StringView::isInline(extracted.size()); + } else { + result.setNull(row, true); + return false; + } } } diff --git a/velox/functions/lib/tests/Re2FunctionsTest.cpp b/velox/functions/lib/tests/Re2FunctionsTest.cpp index 58fd48ea5de9..f27d9cb46453 100644 --- a/velox/functions/lib/tests/Re2FunctionsTest.cpp +++ b/velox/functions/lib/tests/Re2FunctionsTest.cpp @@ -369,6 +369,9 @@ void testRe2Extract(F&& regexExtract) { EXPECT_EQ(regexExtract(std::nullopt, "\\d+", 0), std::nullopt); EXPECT_EQ(regexExtract(" 123 ", std::nullopt, 0), std::nullopt); EXPECT_EQ(regexExtract(" 123 ", "\\d+", std::nullopt), std::nullopt); + // Group case that mismatch. + EXPECT_EQ( + regexExtract("rat cat\nbat dog", "ra(.)|blah(.)(.)", 2), std::nullopt); } TEST_F(Re2FunctionsTest, regexExtract) {