Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Syntax-Based Query Rewriter, Code Review #2 #1495

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/binder/bind_node_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

#include "binder/bind_node_visitor.h"
#include "catalog/catalog.h"
#include "catalog/table_catalog.h"
#include "catalog/column_catalog.h"
#include "expression/expression_util.h"
#include "expression/star_expression.h"
#include "type/type_id.h"
Expand Down Expand Up @@ -250,6 +252,21 @@ void BindNodeVisitor::Visit(expression::TupleValueExpression *expr) {
expr->SetColName(col_name);
expr->SetValueType(value_type);
expr->SetBoundOid(col_pos_tuple);

// TODO(esargent): Uncommenting the following code makes AddressSanitizer get mad at me with a
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a good issue to file, so that we don't make the same mistake when we write a new binder.

// heap buffer overflow whenever I try a query that references the same non-null attribute multiple
// times (e.g. 'SELECT id FROM t WHERE id < 3 AND id > 1'). Leaving it commented out prevents the
// memory error, but then this prevents the is_not_null flag of a tuple expression from being
// populated in some cases (specifically, when the expression's table name is initially empty).

//if (table_obj == nullptr) {
// LOG_DEBUG("Extracting regular table object");
// BinderContext::GetRegularTableObj(context_, table_name, table_obj, depth);
//}

if (table_obj != nullptr) {
expr->SetIsNotNull(table_obj->GetColumnCatalogEntry(std::get<2>(col_pos_tuple), false)->IsNotNull());
}
}
}

Expand Down
44 changes: 31 additions & 13 deletions src/include/common/internal_types.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,3 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// internal_types.h
//
// Identification: src/include/common/internal_types.h
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
//
// Peloton
Expand Down Expand Up @@ -258,7 +246,12 @@ enum class ExpressionType {
// -----------------------------
// Miscellaneous
// -----------------------------
CAST = 600
CAST = 600,

// -----------------------------
// Rewriter-specific identifier
// -----------------------------
GROUP_MARKER = 721
};

// When short_str is true, return a short version of ExpressionType string
Expand Down Expand Up @@ -1383,6 +1376,31 @@ enum class RuleType : uint32_t {
PULL_FILTER_THROUGH_MARK_JOIN,
PULL_FILTER_THROUGH_AGGREGATION,

// AST rewrite rules (logical -> logical)
// Removes ConstantValue =/!=/</>/<=/>= ConstantValue
CONSTANT_COMPARE_EQUAL,
CONSTANT_COMPARE_NOTEQUAL,
CONSTANT_COMPARE_LESSTHAN,
CONSTANT_COMPARE_GREATERTHAN,
CONSTANT_COMPARE_LESSTHANOREQUALTO,
CONSTANT_COMPARE_GREATERTHANOREQUALTO,

// Logical equivalent
EQUIV_AND,
EQUIV_OR,
EQUIV_COMPARE_EQUAL,

TV_EQUALITY_WITH_TWO_CV, // (A.B = x) AND (A.B = y) where x/y are constant
TRANSITIVE_CLOSURE_CONSTANT, // (A.B = x) AND (A.B = C.D)

// Boolean short-circuit rules
AND_SHORT_CIRCUIT, // (FALSE AND B)
OR_SHORT_CIRCUIT, // (TRUE OR B)

// Catalog-based NULL/NON-NULL rules
NULL_LOOKUP_ON_NOT_NULL_COLUMN,
NOT_NULL_LOOKUP_ON_NOT_NULL_COLUMN,

// Place holder to generate number of rules compile time
NUM_RULES

Expand Down
18 changes: 18 additions & 0 deletions src/include/expression/abstract_expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,24 @@ class AbstractExpression : public Printable {
children_[index].reset(expr);
}

void ClearChildren() {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great documentation for this function and how it fits in with the query rewriting process as a whole. Is this function only used once during the rewriting phase for a single query? If not, have you guys looked at how the rewriter’s performance is affected?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is only used once during the rewriting phase in Peloton. As mentioned I believe in rewriter.cpp, this function is solely intended for ensuring that our rewriter manages state (i.e memory, pointers) correctly.

// The rewriter copies the AbstractExpression presented to the rewriter.
// This function is used by the rewriter to properly wipe all the children
// of AbstractExpression once the AbstractExpression tree has been converted
// to the intermediary AbsExpr_Container/Expression tree.
//
// This function should only be invoked on copied AbstractExpressions and
// never on original ones passed into the [rewriter] otherwise we would
// violate immutable properties. We do not believe this function is strictly
// necessary in terrier, however this function does serve some usefulness.
//
// This allows us to reduce our memory footprint while also providing better
// implementation constraints within the rewriter (i.e: the rewriter should
// not be trying to operate directly on AbstractExpression but rather on the
// intermediary representation).
children_.clear();
}

void SetExpressionType(ExpressionType type) { exp_type_ = type; }

//////////////////////////////////////////////////////////////////////////////
Expand Down
64 changes: 64 additions & 0 deletions src/include/expression/group_marker_expression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// group_marker_expression.h
//
// Identification: src/include/expression/group_marker_expression.h
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once

#include "expression/abstract_expression.h"
#include "optimizer/group_expression.h"
#include "util/hash_util.h"

namespace peloton {

namespace executor {
class ExecutorContext;
} // namespace executor

namespace expression {

//===----------------------------------------------------------------------===//
// GroupMarkerExpression
//===----------------------------------------------------------------------===//

class GroupMarkerExpression : public AbstractExpression {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some brief high-level documentation about GroupMarkerExpression and how it is used?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

public:
GroupMarkerExpression(optimizer::GroupID group_id) :
AbstractExpression(ExpressionType::GROUP_MARKER),
group_id_(group_id) {};

optimizer::GroupID GetGroupID() { return group_id_; }

AbstractExpression *Copy() const override {
return new GroupMarkerExpression(group_id_);
}

type::Value Evaluate(const AbstractTuple *tuple1,
const AbstractTuple *tuple2,
executor::ExecutorContext *context) const {
(void)tuple1;
(void)tuple2;
(void)context;
PELOTON_ASSERT(0);
}

void Accept(SqlNodeVisitor *) {
PELOTON_ASSERT(0);
}

protected:
optimizer::GroupID group_id_;

GroupMarkerExpression(const GroupMarkerExpression &other)
: AbstractExpression(other), group_id_(other.group_id_) {}
};

} // namespace expression
} // namespace peloton
12 changes: 11 additions & 1 deletion src/include/expression/tuple_value_expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ class TupleValueExpression : public AbstractExpression {
tuple_idx_ = tuple_idx;
}

inline void SetIsNotNull(bool is_not_null) {
is_not_null_ = is_not_null;
}

/**
* @brief Attribute binding
* @param binding_contexts
Expand Down Expand Up @@ -116,6 +120,8 @@ class TupleValueExpression : public AbstractExpression {
if ((table_name_.empty() xor other.table_name_.empty()) ||
col_name_.empty() xor other.col_name_.empty())
return false;
if (GetIsNotNull() != other.GetIsNotNull())
return false;
bool res = bound_obj_id_ == other.bound_obj_id_;
if (!table_name_.empty() && !other.table_name_.empty())
res = table_name_ == other.table_name_ && res;
Expand Down Expand Up @@ -151,6 +157,8 @@ class TupleValueExpression : public AbstractExpression {

bool GetIsBound() const { return is_bound_; }

bool GetIsNotNull() const { return is_not_null_; }

const std::tuple<oid_t, oid_t, oid_t> &GetBoundOid() const {
return bound_obj_id_;
}
Expand Down Expand Up @@ -185,7 +193,8 @@ class TupleValueExpression : public AbstractExpression {
value_idx_(other.value_idx_),
tuple_idx_(other.tuple_idx_),
table_name_(other.table_name_),
col_name_(other.col_name_) {}
col_name_(other.col_name_),
is_not_null_(other.is_not_null_) {}

// Bound flag
bool is_bound_ = false;
Expand All @@ -196,6 +205,7 @@ class TupleValueExpression : public AbstractExpression {
int tuple_idx_;
std::string table_name_;
std::string col_name_;
bool is_not_null_ = false;

const planner::AttributeInfo *ai_;
};
Expand Down
Loading