Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Modified pg_column_stats initialization #1352

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "catalog/catalog.h"

#include "catalog/column_catalog.h"
#include "catalog/column_stats_catalog.h"
#include "catalog/database_catalog.h"
#include "catalog/database_metrics_catalog.h"
#include "catalog/index_catalog.h"
Expand Down Expand Up @@ -159,6 +160,19 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database,
CATALOG_SCHEMA_NAME, IndexType::BWTREE, IndexConstraintType::DEFAULT,
false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn);

system_catalogs->GetIndexCatalog()->InsertIndex(
COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID,
ColumnStatsCatalog::ColumnId::COLUMN_ID},
pool_.get(), txn);
system_catalogs->GetIndexCatalog()->InsertIndex(
COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn);

// Insert records(default + pg_catalog namespace) into pg_namespace
system_catalogs->GetSchemaCatalog()->InsertSchema(
CATALOG_SCHEMA_OID, CATALOG_SCHEMA_NAME, pool_.get(), txn);
Expand All @@ -185,6 +199,9 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database,
system_catalogs->GetTableCatalog()->InsertTable(
LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME,
database_oid, pool_.get(), txn);
system_catalogs->GetTableCatalog()->InsertTable(
COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, CATALOG_SCHEMA_NAME,
database_oid, pool_.get(), txn);
}

void Catalog::Bootstrap() {
Expand Down
109 changes: 65 additions & 44 deletions src/catalog/column_stats_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "catalog/column_stats_catalog.h"

#include "catalog/catalog.h"
#include "catalog/schema.h"
#include "executor/logical_tile.h"
#include "optimizer/stats/column_stats_collector.h"
#include "storage/data_table.h"
Expand All @@ -21,52 +22,78 @@
namespace peloton {
namespace catalog {

ColumnStatsCatalog *ColumnStatsCatalog::GetInstance(
concurrency::TransactionContext *txn) {
static ColumnStatsCatalog column_stats_catalog{txn};
return &column_stats_catalog;
}

ColumnStatsCatalog::ColumnStatsCatalog(concurrency::TransactionContext *txn)
: AbstractCatalog("CREATE TABLE " CATALOG_DATABASE_NAME
"." CATALOG_SCHEMA_NAME "." COLUMN_STATS_CATALOG_NAME
" ("
"database_id INT NOT NULL, "
"table_id INT NOT NULL, "
"column_id INT NOT NULL, "
"num_rows INT NOT NULL, "
"cardinality DECIMAL NOT NULL, "
"frac_null DECIMAL NOT NULL, "
"most_common_vals VARCHAR, "
"most_common_freqs VARCHAR, "
"histogram_bounds VARCHAR, "
"column_name VARCHAR, "
"has_index BOOLEAN);",
txn) {
// unique key: (database_id, table_id, column_id)
Catalog::GetInstance()->CreateIndex(
CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME,
{0, 1, 2}, COLUMN_STATS_CATALOG_NAME "_skey0", true, IndexType::BWTREE,
txn);
// non-unique key: (database_id, table_id)
Catalog::GetInstance()->CreateIndex(
CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME,
{0, 1}, COLUMN_STATS_CATALOG_NAME "_skey1", false, IndexType::BWTREE,
txn);
ColumnStatsCatalog::ColumnStatsCatalog(
storage::Database *pg_catalog, UNUSED_ATTRIBUTE type::AbstractPool *pool,
UNUSED_ATTRIBUTE concurrency::TransactionContext *txn)
: AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME,
InitializeSchema().release(), pg_catalog) {
// Add indexes for pg_column_stats
AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID},
COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0",
IndexConstraintType::UNIQUE);
AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID,
COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT);
}

ColumnStatsCatalog::~ColumnStatsCatalog() {}

std::unique_ptr<catalog::Schema> ColumnStatsCatalog::InitializeSchema() {
const std::string not_null_constraint_name = "notnull";
const auto not_null_constraint =
catalog::Constraint(ConstraintType::NOTNULL, not_null_constraint_name);

auto table_id_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"table_id", true);
table_id_column.AddConstraint(not_null_constraint);
auto column_id_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"column_id", true);
column_id_column.AddConstraint(not_null_constraint);
auto num_rows_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"num_rows", true);
num_rows_column.AddConstraint(not_null_constraint);
auto cardinality_column = catalog::Column(
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"cardinality", true);
cardinality_column.AddConstraint(not_null_constraint);
auto frac_null_column = catalog::Column(
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"frac_null", true);
frac_null_column.AddConstraint(not_null_constraint);
auto most_common_vals_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_vals", false);
auto most_common_freqs_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_freqs", false);
auto histogram_bounds_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"histogram_bounds", false);
auto column_name_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"column_name", false);
auto has_index_column = catalog::Column(
type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN),
"has_index", true);

std::unique_ptr<catalog::Schema> column_stats_schema(new catalog::Schema(
{table_id_column, column_id_column, num_rows_column, cardinality_column,
frac_null_column, most_common_vals_column, most_common_freqs_column,
histogram_bounds_column, column_name_column, has_index_column}));
return column_stats_schema;
}

bool ColumnStatsCatalog::InsertColumnStats(
oid_t database_id, oid_t table_id, oid_t column_id, int num_rows,
double cardinality, double frac_null, std::string most_common_vals,
oid_t table_id, oid_t column_id, int num_rows, double cardinality,
double frac_null, std::string most_common_vals,
std::string most_common_freqs, std::string histogram_bounds,
std::string column_name, bool has_index, type::AbstractPool *pool,
concurrency::TransactionContext *txn) {
std::unique_ptr<storage::Tuple> tuple(
new storage::Tuple(catalog_table_->GetSchema(), true));

auto val_db_id = type::ValueFactory::GetIntegerValue(database_id);
auto val_table_id = type::ValueFactory::GetIntegerValue(table_id);
auto val_column_id = type::ValueFactory::GetIntegerValue(column_id);
auto val_num_row = type::ValueFactory::GetIntegerValue(num_rows);
Expand Down Expand Up @@ -96,7 +123,6 @@ bool ColumnStatsCatalog::InsertColumnStats(
type::ValueFactory::GetVarcharValue(column_name);
type::Value val_has_index = type::ValueFactory::GetBooleanValue(has_index);

tuple->SetValue(ColumnId::DATABASE_ID, val_db_id, nullptr);
tuple->SetValue(ColumnId::TABLE_ID, val_table_id, nullptr);
tuple->SetValue(ColumnId::COLUMN_ID, val_column_id, nullptr);
tuple->SetValue(ColumnId::NUM_ROWS, val_num_row, nullptr);
Expand All @@ -113,29 +139,25 @@ bool ColumnStatsCatalog::InsertColumnStats(
}

bool ColumnStatsCatalog::DeleteColumnStats(
oid_t database_id, oid_t table_id, oid_t column_id,
concurrency::TransactionContext *txn) {
oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) {
oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index

std::vector<type::Value> values;
values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy());
values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy());
values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy());

return DeleteWithIndexScan(index_offset, values, txn);
}

std::unique_ptr<std::vector<type::Value>> ColumnStatsCatalog::GetColumnStats(
oid_t database_id, oid_t table_id, oid_t column_id,
concurrency::TransactionContext *txn) {
oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) {
std::vector<oid_t> column_ids(
{ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL,
ColumnId::MOST_COMMON_VALS, ColumnId::MOST_COMMON_FREQS,
ColumnId::HISTOGRAM_BOUNDS, ColumnId::COLUMN_NAME, ColumnId::HAS_INDEX});
oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index

std::vector<type::Value> values;
values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy());
values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy());
values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy());

Expand Down Expand Up @@ -175,7 +197,7 @@ std::unique_ptr<std::vector<type::Value>> ColumnStatsCatalog::GetColumnStats(

// Return value: number of column stats
size_t ColumnStatsCatalog::GetTableStats(
oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn,
oid_t table_id, concurrency::TransactionContext *txn,
std::map<oid_t, std::unique_ptr<std::vector<type::Value>>>
&column_stats_map) {
std::vector<oid_t> column_ids(
Expand All @@ -186,7 +208,6 @@ size_t ColumnStatsCatalog::GetTableStats(
oid_t index_offset = IndexId::SECONDARY_KEY_1; // Secondary key index

std::vector<type::Value> values;
values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy());
values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy());

auto result_tiles =
Expand Down
6 changes: 5 additions & 1 deletion src/catalog/system_catalogs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "catalog/system_catalogs.h"
#include "catalog/column_catalog.h"
#include "catalog/column_stats_catalog.h"
#include "catalog/index_catalog.h"
#include "catalog/layout_catalog.h"
#include "catalog/table_catalog.h"
Expand All @@ -36,6 +37,7 @@ SystemCatalogs::SystemCatalogs(storage::Database *database,
pg_query_metrics_(nullptr) {
oid_t database_oid = database->GetOid();
pg_attribute_ = new ColumnCatalog(database, pool, txn);
pg_column_stats_ = new ColumnStatsCatalog(database, pool, txn);
pg_namespace_ = new SchemaCatalog(database, pool, txn);
pg_table_ = new TableCatalog(database, pool, txn);
pg_index_ = new IndexCatalog(database, pool, txn);
Expand All @@ -48,7 +50,8 @@ SystemCatalogs::SystemCatalogs(storage::Database *database,
{database_oid, TABLE_CATALOG_OID},
{database_oid, SCHEMA_CATALOG_OID},
{database_oid, INDEX_CATALOG_OID},
{database_oid, LAYOUT_CATALOG_OID}};
{database_oid, LAYOUT_CATALOG_OID},
{database_oid, COLUMN_STATS_CATALOG_OID}};

for (int i = 0; i < (int)shared_tables.size(); i++) {
oid_t column_id = 0;
Expand All @@ -72,6 +75,7 @@ SystemCatalogs::~SystemCatalogs() {
delete pg_table_;
delete pg_attribute_;
delete pg_namespace_;
delete pg_column_stats_;
if (pg_trigger_) delete pg_trigger_;
// if (pg_proc) delete pg_proc;
if (pg_table_metrics_) delete pg_table_metrics_;
Expand Down
10 changes: 8 additions & 2 deletions src/include/catalog/catalog_defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@ namespace catalog {
#define CATALOG_DATABASE_NAME "peloton"

// Catalog tables
// 5 basic catalog tables
// basic catalog tables
#define DATABASE_CATALOG_NAME "pg_database"
#define SCHEMA_CATALOG_NAME "pg_namespace"
#define TABLE_CATALOG_NAME "pg_table"
#define INDEX_CATALOG_NAME "pg_index"
#define COLUMN_CATALOG_NAME "pg_attribute"
#define LAYOUT_CATALOG_NAME "pg_layout"
#define COLUMN_STATS_CATALOG_NAME "pg_column_stats"

// Local oids from START_OID = 0 to START_OID + OID_OFFSET are reserved
#define OID_OFFSET 100
#define CATALOG_TABLES_COUNT 9
#define CATALOG_TABLES_COUNT 10

// Oid mask for each type
#define DATABASE_OID_MASK (static_cast<oid_t>(catalog::CatalogType::DATABASE))
Expand Down Expand Up @@ -64,6 +65,7 @@ namespace catalog {
#define INDEX_CATALOG_OID (3 | TABLE_OID_MASK)
#define COLUMN_CATALOG_OID (4 | TABLE_OID_MASK)
#define LAYOUT_CATALOG_OID (5 | TABLE_OID_MASK)
#define COLUMN_STATS_CATALOG_OID (6 | TABLE_OID_MASK)

// Reserved pg_column index oid
#define COLUMN_CATALOG_PKEY_OID (0 | INDEX_OID_MASK)
Expand Down Expand Up @@ -92,6 +94,10 @@ namespace catalog {
#define LAYOUT_CATALOG_PKEY_OID (13 | INDEX_OID_MASK)
#define LAYOUT_CATALOG_SKEY0_OID (14 | INDEX_OID_MASK)

// Reserved pg_column_stats index oid
#define COLUMN_STATS_CATALOG_SKEY0_OID (15 | INDEX_OID_MASK)
#define COLUMN_STATS_CATALOG_SKEY1_OID (16 | INDEX_OID_MASK)

// Use upper 8 bits indicating catalog type
#define CATALOG_TYPE_OFFSET 24

Expand Down
Loading