Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Syntax-based query rewriter #1494

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/include/common/internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,10 @@ enum class RuleType : uint32_t {
PULL_FILTER_THROUGH_MARK_JOIN,
PULL_FILTER_THROUGH_AGGREGATION,

// AST rewrite rules (logical -> logical)
// Removes ConstantValueExpression = ConstantValueExpression
COMP_EQUALITY_ELIMINATION,

// Place holder to generate number of rules compile time
NUM_RULES

Expand Down
183 changes: 183 additions & 0 deletions src/include/optimizer/absexpr_expression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// absexpr_expression.h
//
// Identification: src/include/optimizer/absexpr_expression.h
//
//===----------------------------------------------------------------------===//

#pragma once

// AbstractExpression Definition
#include "expression/abstract_expression.h"
#include "expression/conjunction_expression.h"
#include "expression/comparison_expression.h"
#include "expression/constant_value_expression.h"

#include <memory>

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same nitpick about ordering #include's.

#include <vector>

namespace peloton {
namespace optimizer {

// (TODO): rethink the AbsExpr_Container/Expression approach in comparion to abstract
// Most of the core rule/optimizer code relies on the concept of an Operator /
// OperatorExpression and the interface that the two functions respectively expose.
//
// The annoying part is that an AbstractExpression blends together an Operator
// and OperatorExpression. Second part, the AbstractExpression does not export the
// correct interface that the rest of the system depends on.
//
// As an extreme level of simplification (sort of hacky), an AbsExpr_Container is
// analogous to Operator and wraps a single AbstractExpression node. AbsExpr_Expression
// is analogous to OperatorExpression.
//
// AbsExpr_Container does *not* handle memory correctly w.r.t internal instantiations
// from Rule transformation. This is since Peloton itself mixes unique_ptrs and
// hands out raw pointers which makes adding a shared_ptr here extremely problematic.
// terrier uses only shared_ptr when dealing with AbstractExpression trees.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(All Terrier parser behavior can be changed, just FYI. If anything would make it more convenient for you, make the case for it.)


class AbsExpr_Container {
public:
AbsExpr_Container();

AbsExpr_Container(const expression::AbstractExpression *expr) {
node = expr;
}

// Return operator type
ExpressionType GetType() const {
if (IsDefined()) {
return node->GetExpressionType();
}
return ExpressionType::INVALID;
}

const expression::AbstractExpression *GetExpr() const {
return node;
}

// Operator contains Logical node
bool IsLogical() const {
return true;
}

// Operator contains Physical node
bool IsPhysical() const {
return false;
}

std::string GetName() const {
if (IsDefined()) {
return node->GetExpressionName();
}

return "Undefined";
}

hash_t Hash() const {
if (IsDefined()) {
return node->Hash();
}
return 0;
}

bool operator==(const AbsExpr_Container &r) {
if (IsDefined() && r.IsDefined()) {
// (TODO): need a better way to determine deep equality

// NOTE:
// Without proper equality determinations, the groups will
// not be assigned correctly. Arguably, terrier does this
// better because a blind ExactlyEquals on different types
// of ConstantValueExpression under Peloton will crash!

// For now, just return (false).
// I don't anticipate this will affect correctness, just
// performance, since duplicate trees will have to evaluated
// over and over again, rather than being able to "borrow"
// a previous tree's rewrite.
//
// Probably not worth to create a "validator" since porting
// this to terrier anyways (?). == does not check Value
// so it's broken. ExactlyEqual requires precondition checking.
return false;
} else if (!IsDefined() && !r.IsDefined()) {
return true;
}
return false;
}

Copy link

@KatiaVi KatiaVi Apr 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not be a huge deal since, as you said, the == op doesn't affect correctness but do you intend to implement the == op for rewrites? If so, does there exist an == op for AbstractExpression you can use?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe Terrier currently only has the notion of logical equality for abstract expressions.

// Operator contains physical or logical operator node
bool IsDefined() const {
return node != nullptr;
}

//(TODO): fix memory management once go to terrier
expression::AbstractExpression *Rebuild(std::vector<expression::AbstractExpression*> children) {
switch (GetType()) {
case ExpressionType::COMPARE_EQUAL:
case ExpressionType::COMPARE_NOTEQUAL:
case ExpressionType::COMPARE_LESSTHAN:
case ExpressionType::COMPARE_GREATERTHAN:
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
case ExpressionType::COMPARE_LIKE:
case ExpressionType::COMPARE_NOTLIKE:
case ExpressionType::COMPARE_IN:
case ExpressionType::COMPARE_DISTINCT_FROM: {
PELOTON_ASSERT(children.size() == 2);
return new expression::ComparisonExpression(GetType(), children[0], children[1]);
}
case ExpressionType::CONJUNCTION_AND:
case ExpressionType::CONJUNCTION_OR: {
PELOTON_ASSERT(children.size() == 2);
return new expression::ConjunctionExpression(GetType(), children[0], children[1]);
}
case ExpressionType::VALUE_CONSTANT: {
PELOTON_ASSERT(children.size() == 0);
auto cve = static_cast<const expression::ConstantValueExpression*>(node);
return new expression::ConstantValueExpression(cve->GetValue());
}
default: {
int type = static_cast<int>(GetType());
LOG_ERROR("Unimplemented Rebuild() for %d found", type);
return nullptr;
}
}
}

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does Rebuild do in context of its AbsExpr_Container? You could probably add some more documentation for this function.

private:
const expression::AbstractExpression *node;
};

class AbsExpr_Expression {
public:
AbsExpr_Expression(AbsExpr_Container op): op(op) {};

void PushChild(std::shared_ptr<AbsExpr_Expression> op) {
children.push_back(op);
}

void PopChild() {
children.pop_back();
}

const std::vector<std::shared_ptr<AbsExpr_Expression>> &Children() const {
return children;
}

const AbsExpr_Container &Op() const {
return op;
}

private:
AbsExpr_Container op;
std::vector<std::shared_ptr<AbsExpr_Expression>> children;
};

} // namespace optimizer
} // namespace peloton

48 changes: 28 additions & 20 deletions src/include/optimizer/binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,63 +24,71 @@ namespace peloton {
namespace optimizer {

class Optimizer;

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

//===--------------------------------------------------------------------===//
// Binding Iterator
//===--------------------------------------------------------------------===//
template <class Node, class OperatorType, class OperatorExpr>
class BindingIterator {
public:
BindingIterator(Memo& memo) : memo_(memo) {}
BindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo) : memo_(memo) {}

virtual ~BindingIterator(){};

virtual bool HasNext() = 0;

virtual std::shared_ptr<OperatorExpression> Next() = 0;
virtual std::shared_ptr<OperatorExpr> Next() = 0;

protected:
Memo &memo_;
Memo<Node,OperatorType,OperatorExpr> &memo_;
};

class GroupBindingIterator : public BindingIterator {
template <class Node, class OperatorType, class OperatorExpr>
class GroupBindingIterator : public BindingIterator<Node,OperatorType,OperatorExpr> {
public:
GroupBindingIterator(Memo& memo, GroupID id,
std::shared_ptr<Pattern> pattern);
GroupBindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo,
GroupID id,
std::shared_ptr<Pattern<OperatorType>> pattern);

bool HasNext() override;

std::shared_ptr<OperatorExpression> Next() override;
std::shared_ptr<OperatorExpr> Next() override;

private:
GroupID group_id_;
std::shared_ptr<Pattern> pattern_;
Group *target_group_;
std::shared_ptr<Pattern<OperatorType>> pattern_;
Group<Node,OperatorType,OperatorExpr> *target_group_;
size_t num_group_items_;

// Internal function for HasNext()
bool HasNextBinding();

size_t current_item_index_;
std::unique_ptr<BindingIterator> current_iterator_;
std::unique_ptr<BindingIterator<Node,OperatorType,OperatorExpr>> current_iterator_;
};

class GroupExprBindingIterator : public BindingIterator {
template <class Node, class OperatorType, class OperatorExpr>
class GroupExprBindingIterator : public BindingIterator<Node,OperatorType,OperatorExpr> {
public:
GroupExprBindingIterator(Memo& memo,
GroupExpression *gexpr,
std::shared_ptr<Pattern> pattern);
GroupExprBindingIterator(Memo<Node,OperatorType,OperatorExpr>& memo,
GroupExpression<Node,OperatorType,OperatorExpr> *gexpr,
std::shared_ptr<Pattern<OperatorType>> pattern);

bool HasNext() override;

std::shared_ptr<OperatorExpression> Next() override;
std::shared_ptr<OperatorExpr> Next() override;

private:
GroupExpression* gexpr_;
std::shared_ptr<Pattern> pattern_;
GroupExpression<Node,OperatorType,OperatorExpr>* gexpr_;
std::shared_ptr<Pattern<OperatorType>> pattern_;

bool first_;
bool has_next_;
std::shared_ptr<OperatorExpression> current_binding_;
std::vector<std::vector<std::shared_ptr<OperatorExpression>>>
children_bindings_;
std::shared_ptr<OperatorExpr> current_binding_;
std::vector<std::vector<std::shared_ptr<OperatorExpr>>> children_bindings_;
std::vector<size_t> children_bindings_pos_;
};

Expand Down
12 changes: 8 additions & 4 deletions src/include/optimizer/child_property_deriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#pragma once
#include <memory>
#include "optimizer/operator_visitor.h"
#include "optimizer/operator_expression.h"

namespace peloton {

namespace optimizer {
template <class Node, class OpType, class OperatorExpr>
class Memo;
}

Expand All @@ -33,8 +35,10 @@ class ChildPropertyDeriver : public OperatorVisitor {
public:
std::vector<std::pair<std::shared_ptr<PropertySet>,
std::vector<std::shared_ptr<PropertySet>>>>
GetProperties(GroupExpression *gexpr,
std::shared_ptr<PropertySet> requirements, Memo *memo);

GetProperties(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
std::shared_ptr<PropertySet> requirements,
Memo<Operator,OpType,OperatorExpression> *memo);

void Visit(const DummyScan *) override;
void Visit(const PhysicalSeqScan *) override;
Expand Down Expand Up @@ -74,8 +78,8 @@ class ChildPropertyDeriver : public OperatorVisitor {
* @brief We need the memo and gexpr because some property may depend on
* child's schema
*/
Memo *memo_;
GroupExpression *gexpr_;
Memo<Operator,OpType,OperatorExpression> *memo_;
GroupExpression<Operator,OpType,OperatorExpression> *gexpr_;
};

} // namespace optimizer
Expand Down
5 changes: 4 additions & 1 deletion src/include/optimizer/cost_model/abstract_cost_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#pragma once

#include "optimizer/operator_visitor.h"
#include "optimizer/operator_expression.h"

namespace peloton {
namespace optimizer {

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

// Default cost when cost model cannot compute correct cost.
Expand All @@ -34,7 +36,8 @@ static constexpr double DEFAULT_OPERATOR_COST = 0.0025;

class AbstractCostModel : public OperatorVisitor {
public:
virtual double CalculateCost(GroupExpression *gexpr, Memo *memo,
virtual double CalculateCost(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
Memo<Operator,OpType,OperatorExpression> *memo,
concurrency::TransactionContext *txn) = 0;
};

Expand Down
11 changes: 7 additions & 4 deletions src/include/optimizer/cost_model/default_cost_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@
namespace peloton {
namespace optimizer {

template <class Node, class OperatorType, class OperatorExpr>
class Memo;

// Derive cost for a physical group expression
class DefaultCostModel : public AbstractCostModel {
public:
DefaultCostModel(){};

double CalculateCost(GroupExpression *gexpr, Memo *memo,
concurrency::TransactionContext *txn) {
double CalculateCost(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
Memo<Operator,OpType,OperatorExpression> *memo,
concurrency::TransactionContext *txn) {
gexpr_ = gexpr;
memo_ = memo;
txn_ = txn;
Expand Down Expand Up @@ -151,8 +154,8 @@ class DefaultCostModel : public AbstractCostModel {
return child_num_rows * DEFAULT_TUPLE_COST;
}

GroupExpression *gexpr_;
Memo *memo_;
GroupExpression<Operator,OpType,OperatorExpression> *gexpr_;
Memo<Operator,OpType,OperatorExpression> *memo_;
concurrency::TransactionContext *txn_;
double output_cost_ = 0;
};
Expand Down
Loading