Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

[15721] Hash Join Cost Update #1344

Open
wants to merge 47 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b8b5ad4
Fork sync
Mar 23, 2018
6d619a8
Initial
Mar 24, 2018
52b7f53
Plan creation working
Mar 25, 2018
9bb6cc3
Able to get table names
Mar 26, 2018
c6badc0
Refactored plan selection test and added sort test and another join o…
nappelson Mar 28, 2018
d96f02f
Add analyze statements to plan selection test
nappelson Mar 28, 2018
a1d6f4d
Fix to optimizer rule bitmap
Mar 29, 2018
368c201
Added test case
Mar 29, 2018
ba827a9
Merge in tests branch
Apr 2, 2018
7bc5434
Support for generating worst plan
Apr 2, 2018
1c13695
Add timer output to plan selection tests
nappelson Apr 3, 2018
1d61adb
Merge branch 'master' into master
pervazea Apr 3, 2018
0c2e9e6
add simple worst case plan selection test
nappelson Apr 4, 2018
9b6fd0d
refactor Cost Model instantiation (make it a parameter to optimizer c…
nappelson Apr 9, 2018
b17f5cb
Initial
Mar 24, 2018
60f5193
Plan creation working
Mar 25, 2018
b689891
Able to get table names
Mar 26, 2018
9ef318f
Refactored plan selection test and added sort test and another join o…
nappelson Mar 28, 2018
a112af1
Add analyze statements to plan selection test
nappelson Mar 28, 2018
dc89ec2
Support for generating worst plan
Apr 2, 2018
966b231
Add timer output to plan selection tests
nappelson Apr 3, 2018
393661e
add simple worst case plan selection test
nappelson Apr 4, 2018
14220e3
refactor Cost Model instantiation (make it a parameter to optimizer c…
nappelson Apr 9, 2018
17d4f1a
Merge branch 'tests' of github.com:GustavoAngulo/peloton into project_3
nappelson Apr 10, 2018
4f1505e
add example of bad cost model using new optimizer cost model refactor
nappelson Apr 10, 2018
48a09dd
reverse failed unit test order to make tests pass (need to fix commun…
nappelson Apr 12, 2018
d604638
Reverted changes from another branch
Apr 12, 2018
df8f4a5
Fix to optimizer rule bitmap
Mar 29, 2018
3f086f2
Added test case
Mar 29, 2018
ff8af6e
Fix to memo table
Apr 11, 2018
f95cc30
Merge branch 'gus_master' into project_3
nappelson Apr 23, 2018
20facd1
Fix for PrintPlan bug
Apr 23, 2018
a4e9f5e
Reset the child idx and total cost before we return if it is the firs…
nappelson Apr 24, 2018
b22603a
Merge branch 'tests' of github.com:GustavoAngulo/peloton into project_3
nappelson Apr 24, 2018
e4674b2
Fixed bug where stats not properly generated
May 3, 2018
9f36c6a
Initial commit for updated hash join cost
May 5, 2018
1eaa236
added flag that allows users to specify different cost calculators w…
nappelson May 6, 2018
a33e8fc
Moved new postgresesque hash join cost model code into new postgres c…
nappelson May 6, 2018
d4e2331
Addressing comments from PR 1
May 6, 2018
79f9ff2
Moved new postgresesque hash join cost model code into new postgres c…
nappelson May 6, 2018
750773c
add cost model evaluator
nappelson May 12, 2018
5a8c7a4
Merge branch 'tests' of github.com:GustavoAngulo/peloton into project…
nappelson May 12, 2018
49ec363
add some documentation'
nappelson May 12, 2018
9ba572e
Fix to highest freq calculation
May 13, 2018
5731859
add table printing info to explain statement
nappelson May 13, 2018
670c633
Merge branch 'tests' of github.com:GustavoAngulo/peloton into project…
nappelson May 13, 2018
e48873c
fix query count in cost model evaluator
nappelson May 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/include/optimizer/abstract_cost_calculator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// abstract_cost_calculator.h
//
// Identification: src/include/optimizer/abstract_cost_calculator.h
//
// Copyright (c) 2015-18, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once

#include "optimizer/operator_visitor.h"

namespace peloton {
namespace optimizer {

class Memo;

class AbstractCostCalculator : public OperatorVisitor {
public:
virtual double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn) = 0;
};

} // namespace optimizer
} // namespace peloton
58 changes: 30 additions & 28 deletions src/include/optimizer/cost_calculator.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,42 @@

#pragma once

#include "optimizer/operator_visitor.h"
#include "optimizer/abstract_cost_calculator.h"

namespace peloton {
namespace optimizer {

class Memo;
// Derive cost for a physical group expressionh
class CostCalculator : public OperatorVisitor {
// Derive cost for a physical group expression
class CostCalculator : public AbstractCostCalculator {
public:
double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn);

void Visit(const DummyScan *) override;
void Visit(const PhysicalSeqScan *) override;
void Visit(const PhysicalIndexScan *) override;
void Visit(const QueryDerivedScan *) override;
void Visit(const PhysicalOrderBy *) override;
void Visit(const PhysicalLimit *) override;
void Visit(const PhysicalInnerNLJoin *) override;
void Visit(const PhysicalLeftNLJoin *) override;
void Visit(const PhysicalRightNLJoin *) override;
void Visit(const PhysicalOuterNLJoin *) override;
void Visit(const PhysicalInnerHashJoin *) override;
void Visit(const PhysicalLeftHashJoin *) override;
void Visit(const PhysicalRightHashJoin *) override;
void Visit(const PhysicalOuterHashJoin *) override;
void Visit(const PhysicalInsert *) override;
void Visit(const PhysicalInsertSelect *) override;
void Visit(const PhysicalDelete *) override;
void Visit(const PhysicalUpdate *) override;
void Visit(const PhysicalHashGroupBy *) override;
void Visit(const PhysicalSortGroupBy *) override;
void Visit(const PhysicalDistinct *) override;
void Visit(const PhysicalAggregate *) override;
CostCalculator(){};

virtual double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn) override;

virtual void Visit(const DummyScan *) override;
virtual void Visit(const PhysicalSeqScan *) override;
virtual void Visit(const PhysicalIndexScan *) override;
virtual void Visit(const QueryDerivedScan *) override;
virtual void Visit(const PhysicalOrderBy *) override;
virtual void Visit(const PhysicalLimit *) override;
virtual void Visit(const PhysicalInnerNLJoin *) override;
virtual void Visit(const PhysicalLeftNLJoin *) override;
virtual void Visit(const PhysicalRightNLJoin *) override;
virtual void Visit(const PhysicalOuterNLJoin *) override;
virtual void Visit(const PhysicalInnerHashJoin *) override;
virtual void Visit(const PhysicalLeftHashJoin *) override;
virtual void Visit(const PhysicalRightHashJoin *) override;
virtual void Visit(const PhysicalOuterHashJoin *) override;
virtual void Visit(const PhysicalInsert *) override;
virtual void Visit(const PhysicalInsertSelect *) override;
virtual void Visit(const PhysicalDelete *) override;
virtual void Visit(const PhysicalUpdate *) override;
virtual void Visit(const PhysicalHashGroupBy *) override;
virtual void Visit(const PhysicalSortGroupBy *) override;
virtual void Visit(const PhysicalDistinct *) override;
virtual void Visit(const PhysicalAggregate *) override;

private:
double HashCost();
Expand Down
31 changes: 31 additions & 0 deletions src/include/optimizer/cost_calculator_factory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// cost_model_factory.h
//
// Identification: src/include/optimizer/cost_model_factory.h
//
// Copyright (c) 2015-18, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once
#include "optimizer/cost_calculator.h"

#include "common/exception.h"

namespace peloton {
namespace optimizer {

class CostCalculatorFactory {
public:
/*
* Creates the respective cost calculator given a cost calculator name
*/
static std::unique_ptr<AbstractCostCalculator> CreateCostCalculator(
const std::string &cost_model_name);
};

} // namespace peloton
} // namespace optimizer
26 changes: 17 additions & 9 deletions src/include/optimizer/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <memory>

#include "optimizer/abstract_optimizer.h"
#include "optimizer/abstract_cost_calculator.h"
#include "optimizer/property_set.h"
#include "optimizer/optimizer_metadata.h"

Expand All @@ -38,9 +39,9 @@ class TransactionContext;
}

namespace test {
class OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
}
class OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
}

namespace optimizer {

Expand All @@ -60,16 +61,19 @@ class Optimizer : public AbstractOptimizer {
friend class BindingIterator;
friend class GroupBindingIterator;

friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
friend class ::peloton::test::
OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
friend class ::peloton::test::
OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;

public:
Optimizer(const Optimizer &) = delete;
Optimizer &operator=(const Optimizer &) = delete;
Optimizer(Optimizer &&) = delete;
Optimizer &operator=(Optimizer &&) = delete;

Optimizer();
Optimizer(){};
Optimizer(std::unique_ptr<AbstractCostCalculator> cost_calculator);

std::shared_ptr<planner::AbstractPlan> BuildPelotonPlanTree(
const std::unique_ptr<parser::SQLStatementList> &parse_tree,
Expand All @@ -83,14 +87,16 @@ class Optimizer : public AbstractOptimizer {

OptimizerMetadata &GetMetadata() { return metadata_; }

AbstractCostCalculator *GetCostCalculator() { return cost_calculator_.get(); }

/* For test purposes only */
std::shared_ptr<GroupExpression> TestInsertQueryTree(parser::SQLStatement *tree,
concurrency::TransactionContext *txn) {
std::shared_ptr<GroupExpression> TestInsertQueryTree(
parser::SQLStatement *tree, concurrency::TransactionContext *txn) {
return InsertQueryTree(tree, txn);
}
/* For test purposes only */
void TestExecuteTaskStack(OptimizerTaskStack &task_stack, int root_group_id,
std::shared_ptr<OptimizeContext> root_context) {
std::shared_ptr<OptimizeContext> root_context) {
return ExecuteTaskStack(task_stack, root_group_id, root_context);
}

Expand Down Expand Up @@ -153,6 +159,8 @@ class Optimizer : public AbstractOptimizer {
//////////////////////////////////////////////////////////////////////////////
/// Metadata
OptimizerMetadata metadata_;
/// Cost Model
std::unique_ptr<AbstractCostCalculator> cost_calculator_;
};

} // namespace optimizer
Expand Down
18 changes: 16 additions & 2 deletions src/include/optimizer/optimizer_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#pragma once

#include "common/timer.h"
#include "optimizer/cost_calculator.h"
#include "optimizer/cost_calculator_factory.h"
#include "optimizer/memo.h"
#include "optimizer/group_expression.h"
#include "optimizer/rule.h"
Expand All @@ -27,20 +29,32 @@ namespace optimizer {
class OptimizerTaskPool;
class RuleSet;

using SettingsManager = settings::SettingsManager;
using SettingId = settings::SettingId;

class OptimizerMetadata {
public:
OptimizerMetadata()
: timeout_limit(settings::SettingsManager::GetInt(
: cost_calculator(CostCalculatorFactory::CreateCostCalculator(
SettingsManager::GetString(SettingId::cost_calculator))),
timeout_limit(
SettingsManager::GetInt(SettingId::task_execution_timeout)),
timer(Timer<std::milli>()) {}

OptimizerMetadata(std::unique_ptr<AbstractCostCalculator> cost_calculator)
: cost_calculator(std::move(cost_calculator)),
timeout_limit(settings::SettingsManager::GetInt(
settings::SettingId::task_execution_timeout)),
timer(Timer<std::milli>()) {}

Memo memo;
RuleSet rule_set;
OptimizerTaskPool *task_pool;
std::unique_ptr<AbstractCostCalculator> cost_calculator;
catalog::CatalogCache *catalog_cache;
unsigned int timeout_limit;
Timer<std::milli> timer;
concurrency::TransactionContext* txn;
concurrency::TransactionContext *txn;

void SetTaskPool(OptimizerTaskPool *task_pool) {
this->task_pool = task_pool;
Expand Down
3 changes: 1 addition & 2 deletions src/include/optimizer/optimizer_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ class OptimizeInputs : public OptimizerTask {
*/
class DeriveStats : public OptimizerTask {
public:
DeriveStats(GroupExpression *gexpr,
ExprSet required_cols,
DeriveStats(GroupExpression *gexpr, ExprSet required_cols,
std::shared_ptr<OptimizeContext> context)
: OptimizerTask(context, OptimizerTaskType::DERIVE_STATS),
gexpr_(gexpr),
Expand Down
70 changes: 70 additions & 0 deletions src/include/optimizer/postgres_cost_calculator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// cost_and_stats_calculator.h
//
// Identification: src/include/optimizer/cost_calculator.h
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once

#include "optimizer/abstract_cost_calculator.h"

// TODO: This is not fully reflective of the postgres cost model. Currently we
// are attempting
// to emulate their hash join cost model

namespace peloton {
namespace optimizer {

class Memo;
// Derive cost for a physical group expression
class PostgresCostCalculator : public AbstractCostCalculator {
public:
double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn) override;

void Visit(const DummyScan *) override;
void Visit(const PhysicalSeqScan *) override;
void Visit(const PhysicalIndexScan *) override;
void Visit(const QueryDerivedScan *) override;
void Visit(const PhysicalOrderBy *) override;
void Visit(const PhysicalLimit *) override;
void Visit(const PhysicalInnerNLJoin *) override;
void Visit(const PhysicalLeftNLJoin *) override;
void Visit(const PhysicalRightNLJoin *) override;
void Visit(const PhysicalOuterNLJoin *) override;
void Visit(const PhysicalInnerHashJoin *) override;
void Visit(const PhysicalLeftHashJoin *) override;
void Visit(const PhysicalRightHashJoin *) override;
void Visit(const PhysicalOuterHashJoin *) override;
void Visit(const PhysicalInsert *) override;
void Visit(const PhysicalInsertSelect *) override;
void Visit(const PhysicalDelete *) override;
void Visit(const PhysicalUpdate *) override;
void Visit(const PhysicalHashGroupBy *) override;
void Visit(const PhysicalSortGroupBy *) override;
void Visit(const PhysicalDistinct *) override;
void Visit(const PhysicalAggregate *) override;

private:
double HashCost();
double SortCost();
double GroupByCost();

/* Checks if keys for a join child only reference one table */
bool IsBaseTable(
const std::vector<std::unique_ptr<expression::AbstractExpression>> &keys);

GroupExpression *gexpr_;
Memo *memo_;
concurrency::TransactionContext *txn_;
double output_cost_ = 0;
};

} // namespace optimizer
} // namespace peloton
15 changes: 14 additions & 1 deletion src/include/optimizer/stats/column_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,24 @@ class ColumnStats {

bool is_basetable;

std::string ToString() {
std::string ToString(bool verbose = false) {
std::ostringstream os;
os << "column_id :" << column_id << "\n"
<< "column_name :" << column_name << "\n"
<< "num_rows :" << num_rows << "\n";

if (verbose) {
os << "cardinality: " << cardinality << "\n"
<< "frac_null: " << frac_null << "\n";

if (!most_common_vals.empty()) {
os << "most common value: " << most_common_vals[0] << "\n";
}

if (!most_common_freqs.empty()) {
os << "most common freq: " << most_common_freqs[0] << "\n";
}
}
return os.str();
}

Expand Down
Loading