From 189bcc8868919e7ca0bd3ace71765b4d55aa8b2b Mon Sep 17 00:00:00 2001 From: Pavel Velikhov Date: Mon, 23 Sep 2024 20:11:26 +0000 Subject: [PATCH 1/4] Lower threshold for MapJoin for Scale --- ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp index 9b7947845cd6..aaca4254f1dc 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp @@ -163,7 +163,7 @@ bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptrStats->ByteSize < 1e8; + return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 1e5; case EJoinAlgoType::GraceJoin: return true; default: From 3e6195198a6128eecc55cbac268e98479aa10f24 Mon Sep 17 00:00:00 2001 From: Pavel Velikhov Date: Tue, 24 Sep 2024 14:32:25 +0000 Subject: [PATCH 2/4] Added a broken OLAP test --- ydb/core/kqp/ut/join/data/schema/tpch.sql | 2 +- ydb/core/kqp/ut/join/kqp_join_order_ut.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ydb/core/kqp/ut/join/data/schema/tpch.sql b/ydb/core/kqp/ut/join/data/schema/tpch.sql index 72a05a64e21c..d787105842d2 100644 --- a/ydb/core/kqp/ut/join/data/schema/tpch.sql +++ b/ydb/core/kqp/ut/join/data/schema/tpch.sql @@ -32,7 +32,7 @@ CREATE TABLE `/Root/lineitem` ( CREATE TABLE `/Root/nation` ( n_comment String , - n_name String , + n_name String NOT NULL, n_nationkey Int32 NOT NULL, -- Identifier n_regionkey Int32 , -- FK to R_REGIONKEY PRIMARY KEY(n_nationkey) diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 1027b08e013f..08ef387d1da6 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -450,6 +450,10 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { ExecuteJoinOrderTestDataQueryWithStats("queries/tpch11.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH20, StreamLookupJoin, ColumnStore) { + ExecuteJoinOrderTestDataQueryWithStats("queries/tpch20.sql", "stats/tpch100s_no_countmin.json", StreamLookupJoin, ColumnStore); + } + Y_UNIT_TEST_XOR_OR_BOTH_FALSE(TPCH21, StreamLookupJoin, ColumnStore) { ExecuteJoinOrderTestDataQueryWithStats("queries/tpch21.sql", "stats/tpch1000s.json", StreamLookupJoin, ColumnStore); } From 6c6420eb838fffe9a83a59f5ab9bd929a6d6e530 Mon Sep 17 00:00:00 2001 From: Pavel Velikhov Date: Tue, 24 Sep 2024 14:34:28 +0000 Subject: [PATCH 3/4] Added extra files --- ydb/core/kqp/ut/join/data/queries/tpch20.sql | 46 +++++++++++++ .../join/data/stats/tpch100s_no_countmin.json | 68 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 ydb/core/kqp/ut/join/data/queries/tpch20.sql create mode 100644 ydb/core/kqp/ut/join/data/stats/tpch100s_no_countmin.json diff --git a/ydb/core/kqp/ut/join/data/queries/tpch20.sql b/ydb/core/kqp/ut/join/data/queries/tpch20.sql new file mode 100644 index 000000000000..8ced52b003d2 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/tpch20.sql @@ -0,0 +1,46 @@ +PRAGMA TablePathPrefix="/Root"; + +-- TPC-H/TPC-R Potential Part Promotion Query (Q20) +-- TPC TPC-H Parameter Substitution (Version 2.17.2 build 0) +-- using 1680793381 as a seed to the RNG + + +select + s_name, + s_address +from + supplier + cross join nation + cross join ( + select + ps_suppkey + from + partsupp + cross join part + cross join ( + select + l_partkey, + l_suppkey, + 0.5 * sum(l_quantity) as q_threshold + from + lineitem + where + l_shipdate >= cast(date('1993-01-01') as Datetime) + and l_shipdate < cast(date('1993-01-01') as Datetime) + interval('P365D') + group by + l_partkey, + l_suppkey + ) as threshold + where + ps_partkey = p_partkey + and ps_partkey = l_partkey + and ps_suppkey = l_suppkey + and p_name like 'maroon%' + and ps_availqty > threshold.q_threshold + ) as partsupp +where + s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_name = 'VIETNAM' +order by + s_name; \ No newline at end of file diff --git a/ydb/core/kqp/ut/join/data/stats/tpch100s_no_countmin.json b/ydb/core/kqp/ut/join/data/stats/tpch100s_no_countmin.json new file mode 100644 index 000000000000..c9185c8fe2cb --- /dev/null +++ b/ydb/core/kqp/ut/join/data/stats/tpch100s_no_countmin.json @@ -0,0 +1,68 @@ +{ + "/Root/customer": { + "byte_size": 1578400481, + "n_rows": 15000000, + "n_attrs": 8, + "key_columns": [ + "c_custkey" + ] + }, + "/Root/lineitem": { + "byte_size": 41682657607, + "n_rows": 600037902, + "n_attrs": 16, + "key_columns": [ + "l_linenumber", + "l_orderkey" + ] + }, + "/Root/nation": { + "byte_size": 3758, + "n_rows": 25, + "n_attrs": 4, + "key_columns": [ + "n_nationkey" + ] + }, + "/Root/orders": { + "byte_size": 9159017758, + "n_rows": 150000000, + "n_attrs": 9, + "key_columns": [ + "o_orderkey" + ] + }, + "/Root/part": { + "byte_size": 1460288880, + "n_rows": 20000000, + "n_attrs": 9, + "key_columns": [ + "p_partkey" + ] + }, + "/Root/partsupp": { + "byte_size": 5068061409, + "n_rows": 80000000, + "n_attrs": 5, + "key_columns": [ + "ps_partkey", + "ps_suppkey" + ] + }, + "/Root/region": { + "byte_size": 1280, + "n_rows": 5, + "n_attrs": 3, + "key_columns": [ + "r_regionkey" + ] + }, + "/Root/supplier": { + "byte_size": 95577702, + "n_rows": 1000000, + "n_attrs": 7, + "key_columns": [ + "s_suppkey" + ] + } +} \ No newline at end of file From a75b4cc7c28a688b7d9ffb56d4ee924bb70e72eb Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Tue, 24 Sep 2024 16:05:00 +0000 Subject: [PATCH 4/4] fix --- .../kqp/opt/kqp_statistics_transformer.cpp | 82 +++++++++++-------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 88575b2fbd8b..7aed992eb5bd 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -360,42 +360,14 @@ class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer resSelectivity = tmpSelectivity; } else if (auto notNode = input.Maybe()) { resSelectivity = 1 - Compute(notNode.Cast().Value()); - } else if (input.Maybe() && input.Ptr()->ChildrenSize() >= 1) { - auto listPtr = input.Maybe().Cast().Ptr()->Child(1); - size_t listSize = listPtr->ChildrenSize(); - - if (listSize == 3) { - TString compSign = TString(listPtr->Child(0)->Content()); - TString attr = TString(listPtr->Child(1)->Content()); - - TExprContext dummyCtx; - TPositionHandle dummyPos; - - auto rowArg = - Build(dummyCtx, dummyPos) - .Name("row") - .Done(); - - auto member = - Build(dummyCtx, dummyPos) - .Struct(rowArg) - .Name().Build(attr) - .Done(); - - auto value = TExprBase(listPtr->ChildPtr(2)); - if (listPtr->ChildPtr(2)->ChildrenSize() >= 2 && listPtr->ChildPtr(2)->ChildPtr(0)->Content() == "just") { - value = TExprBase(listPtr->ChildPtr(2)->ChildPtr(1)); - } - if (OlapCompSigns.contains(compSign)) { - resSelectivity = this->ComputeComparisonSelectivity(member, value); - } else if (compSign == "eq") { - resSelectivity = this->ComputeEqualitySelectivity(member, value); - } else if (compSign == "neq") { - resSelectivity = 1 - this->ComputeEqualitySelectivity(member, value); - } else if (RegexpSigns.contains(compSign)) { - return 0.5; - } + } else if (input.Maybe()) { + auto list = input.Maybe().Cast().Ptr(); + resSelectivity = ComputeListSelectivity(list); + + if (!resSelectivity.has_value() && list->ChildrenSize() >= 1) { + resSelectivity = ComputeListSelectivity(list->Child(1)); } + } if (!resSelectivity.has_value()) { @@ -408,6 +380,46 @@ class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer } private: + std::optional ComputeListSelectivity(const TExprNode::TPtr& listPtr) { + std::optional resSelectivity; + + size_t listSize = listPtr->ChildrenSize(); + if (listSize == 3) { + TString compSign = TString(listPtr->Child(0)->Content()); + TString attr = TString(listPtr->Child(1)->Content()); + + TExprContext dummyCtx; + TPositionHandle dummyPos; + + auto rowArg = + Build(dummyCtx, dummyPos) + .Name("row") + .Done(); + + auto member = + Build(dummyCtx, dummyPos) + .Struct(rowArg) + .Name().Build(attr) + .Done(); + + auto value = TExprBase(listPtr->ChildPtr(2)); + if (listPtr->ChildPtr(2)->ChildrenSize() >= 2 && listPtr->ChildPtr(2)->ChildPtr(0)->Content() == "just") { + value = TExprBase(listPtr->ChildPtr(2)->ChildPtr(1)); + } + if (OlapCompSigns.contains(compSign)) { + resSelectivity = this->ComputeComparisonSelectivity(member, value); + } else if (compSign == "eq") { + resSelectivity = this->ComputeEqualitySelectivity(member, value); + } else if (compSign == "neq") { + resSelectivity = 1 - this->ComputeEqualitySelectivity(member, value); + } else if (RegexpSigns.contains(compSign)) { + return 0.5; + } + } + + return resSelectivity; + } + THashSet OlapCompSigns = { {"lt"}, {"lte"},