diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 0759da7d42f..ec760f59353 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -13,6 +13,7 @@ #include "catalog/catalog.h" #include "catalog/column_catalog.h" +#include "catalog/column_stats_catalog.h" #include "catalog/database_catalog.h" #include "catalog/database_metrics_catalog.h" #include "catalog/index_catalog.h" @@ -159,6 +160,19 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, CATALOG_SCHEMA_NAME, IndexType::BWTREE, IndexConstraintType::DEFAULT, false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn); + system_catalogs->GetIndexCatalog()->InsertIndex( + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID, + ColumnStatsCatalog::ColumnId::COLUMN_ID}, + pool_.get(), txn); + system_catalogs->GetIndexCatalog()->InsertIndex( + COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); + // Insert records(default + pg_catalog namespace) into pg_namespace system_catalogs->GetSchemaCatalog()->InsertSchema( CATALOG_SCHEMA_OID, CATALOG_SCHEMA_NAME, pool_.get(), txn); @@ -185,6 +199,9 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, system_catalogs->GetTableCatalog()->InsertTable( LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); + system_catalogs->GetTableCatalog()->InsertTable( + COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, CATALOG_SCHEMA_NAME, + database_oid, pool_.get(), txn); } void Catalog::Bootstrap() { diff --git a/src/catalog/column_stats_catalog.cpp b/src/catalog/column_stats_catalog.cpp index bbe94340cdb..d9d84fb7c01 100644 --- a/src/catalog/column_stats_catalog.cpp +++ b/src/catalog/column_stats_catalog.cpp @@ -13,6 +13,7 @@ #include "catalog/column_stats_catalog.h" #include "catalog/catalog.h" +#include "catalog/schema.h" #include "executor/logical_tile.h" #include "optimizer/stats/column_stats_collector.h" #include "storage/data_table.h" @@ -21,52 +22,78 @@ namespace peloton { namespace catalog { -ColumnStatsCatalog *ColumnStatsCatalog::GetInstance( - concurrency::TransactionContext *txn) { - static ColumnStatsCatalog column_stats_catalog{txn}; - return &column_stats_catalog; -} - -ColumnStatsCatalog::ColumnStatsCatalog(concurrency::TransactionContext *txn) - : AbstractCatalog("CREATE TABLE " CATALOG_DATABASE_NAME - "." CATALOG_SCHEMA_NAME "." COLUMN_STATS_CATALOG_NAME - " (" - "database_id INT NOT NULL, " - "table_id INT NOT NULL, " - "column_id INT NOT NULL, " - "num_rows INT NOT NULL, " - "cardinality DECIMAL NOT NULL, " - "frac_null DECIMAL NOT NULL, " - "most_common_vals VARCHAR, " - "most_common_freqs VARCHAR, " - "histogram_bounds VARCHAR, " - "column_name VARCHAR, " - "has_index BOOLEAN);", - txn) { - // unique key: (database_id, table_id, column_id) - Catalog::GetInstance()->CreateIndex( - CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, - {0, 1, 2}, COLUMN_STATS_CATALOG_NAME "_skey0", true, IndexType::BWTREE, - txn); - // non-unique key: (database_id, table_id) - Catalog::GetInstance()->CreateIndex( - CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, - {0, 1}, COLUMN_STATS_CATALOG_NAME "_skey1", false, IndexType::BWTREE, - txn); +ColumnStatsCatalog::ColumnStatsCatalog( + storage::Database *pg_catalog, UNUSED_ATTRIBUTE type::AbstractPool *pool, + UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) + : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, + InitializeSchema().release(), pg_catalog) { + // Add indexes for pg_column_stats + AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID}, + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + IndexConstraintType::UNIQUE); + AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID, + COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT); } ColumnStatsCatalog::~ColumnStatsCatalog() {} +std::unique_ptr ColumnStatsCatalog::InitializeSchema() { + const std::string not_null_constraint_name = "notnull"; + const auto not_null_constraint = + catalog::Constraint(ConstraintType::NOTNULL, not_null_constraint_name); + + auto table_id_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "table_id", true); + table_id_column.AddConstraint(not_null_constraint); + auto column_id_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "column_id", true); + column_id_column.AddConstraint(not_null_constraint); + auto num_rows_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "num_rows", true); + num_rows_column.AddConstraint(not_null_constraint); + auto cardinality_column = catalog::Column( + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "cardinality", true); + cardinality_column.AddConstraint(not_null_constraint); + auto frac_null_column = catalog::Column( + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "frac_null", true); + frac_null_column.AddConstraint(not_null_constraint); + auto most_common_vals_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_vals", false); + auto most_common_freqs_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_freqs", false); + auto histogram_bounds_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "histogram_bounds", false); + auto column_name_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "column_name", false); + auto has_index_column = catalog::Column( + type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), + "has_index", true); + + std::unique_ptr column_stats_schema(new catalog::Schema( + {table_id_column, column_id_column, num_rows_column, cardinality_column, + frac_null_column, most_common_vals_column, most_common_freqs_column, + histogram_bounds_column, column_name_column, has_index_column})); + return column_stats_schema; +} + bool ColumnStatsCatalog::InsertColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, int num_rows, - double cardinality, double frac_null, std::string most_common_vals, + oid_t table_id, oid_t column_id, int num_rows, double cardinality, + double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, concurrency::TransactionContext *txn) { std::unique_ptr tuple( new storage::Tuple(catalog_table_->GetSchema(), true)); - auto val_db_id = type::ValueFactory::GetIntegerValue(database_id); auto val_table_id = type::ValueFactory::GetIntegerValue(table_id); auto val_column_id = type::ValueFactory::GetIntegerValue(column_id); auto val_num_row = type::ValueFactory::GetIntegerValue(num_rows); @@ -96,7 +123,6 @@ bool ColumnStatsCatalog::InsertColumnStats( type::ValueFactory::GetVarcharValue(column_name); type::Value val_has_index = type::ValueFactory::GetBooleanValue(has_index); - tuple->SetValue(ColumnId::DATABASE_ID, val_db_id, nullptr); tuple->SetValue(ColumnId::TABLE_ID, val_table_id, nullptr); tuple->SetValue(ColumnId::COLUMN_ID, val_column_id, nullptr); tuple->SetValue(ColumnId::NUM_ROWS, val_num_row, nullptr); @@ -113,12 +139,10 @@ bool ColumnStatsCatalog::InsertColumnStats( } bool ColumnStatsCatalog::DeleteColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy()); @@ -126,8 +150,7 @@ bool ColumnStatsCatalog::DeleteColumnStats( } std::unique_ptr> ColumnStatsCatalog::GetColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { std::vector column_ids( {ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL, ColumnId::MOST_COMMON_VALS, ColumnId::MOST_COMMON_FREQS, @@ -135,7 +158,6 @@ std::unique_ptr> ColumnStatsCatalog::GetColumnStats( oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy()); @@ -175,7 +197,7 @@ std::unique_ptr> ColumnStatsCatalog::GetColumnStats( // Return value: number of column stats size_t ColumnStatsCatalog::GetTableStats( - oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn, + oid_t table_id, concurrency::TransactionContext *txn, std::map>> &column_stats_map) { std::vector column_ids( @@ -186,7 +208,6 @@ size_t ColumnStatsCatalog::GetTableStats( oid_t index_offset = IndexId::SECONDARY_KEY_1; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); auto result_tiles = diff --git a/src/catalog/system_catalogs.cpp b/src/catalog/system_catalogs.cpp index 3900c165f74..e5f3fee02df 100644 --- a/src/catalog/system_catalogs.cpp +++ b/src/catalog/system_catalogs.cpp @@ -12,6 +12,7 @@ #include "catalog/system_catalogs.h" #include "catalog/column_catalog.h" +#include "catalog/column_stats_catalog.h" #include "catalog/index_catalog.h" #include "catalog/layout_catalog.h" #include "catalog/table_catalog.h" @@ -36,6 +37,7 @@ SystemCatalogs::SystemCatalogs(storage::Database *database, pg_query_metrics_(nullptr) { oid_t database_oid = database->GetOid(); pg_attribute_ = new ColumnCatalog(database, pool, txn); + pg_column_stats_ = new ColumnStatsCatalog(database, pool, txn); pg_namespace_ = new SchemaCatalog(database, pool, txn); pg_table_ = new TableCatalog(database, pool, txn); pg_index_ = new IndexCatalog(database, pool, txn); @@ -48,7 +50,8 @@ SystemCatalogs::SystemCatalogs(storage::Database *database, {database_oid, TABLE_CATALOG_OID}, {database_oid, SCHEMA_CATALOG_OID}, {database_oid, INDEX_CATALOG_OID}, - {database_oid, LAYOUT_CATALOG_OID}}; + {database_oid, LAYOUT_CATALOG_OID}, + {database_oid, COLUMN_STATS_CATALOG_OID}}; for (int i = 0; i < (int)shared_tables.size(); i++) { oid_t column_id = 0; @@ -72,6 +75,7 @@ SystemCatalogs::~SystemCatalogs() { delete pg_table_; delete pg_attribute_; delete pg_namespace_; + delete pg_column_stats_; if (pg_trigger_) delete pg_trigger_; // if (pg_proc) delete pg_proc; if (pg_table_metrics_) delete pg_table_metrics_; diff --git a/src/include/catalog/catalog_defaults.h b/src/include/catalog/catalog_defaults.h index 117a7e8ab6d..bb2abfe2400 100644 --- a/src/include/catalog/catalog_defaults.h +++ b/src/include/catalog/catalog_defaults.h @@ -26,17 +26,18 @@ namespace catalog { #define CATALOG_DATABASE_NAME "peloton" // Catalog tables -// 5 basic catalog tables +// basic catalog tables #define DATABASE_CATALOG_NAME "pg_database" #define SCHEMA_CATALOG_NAME "pg_namespace" #define TABLE_CATALOG_NAME "pg_table" #define INDEX_CATALOG_NAME "pg_index" #define COLUMN_CATALOG_NAME "pg_attribute" #define LAYOUT_CATALOG_NAME "pg_layout" +#define COLUMN_STATS_CATALOG_NAME "pg_column_stats" // Local oids from START_OID = 0 to START_OID + OID_OFFSET are reserved #define OID_OFFSET 100 -#define CATALOG_TABLES_COUNT 9 +#define CATALOG_TABLES_COUNT 10 // Oid mask for each type #define DATABASE_OID_MASK (static_cast(catalog::CatalogType::DATABASE)) @@ -64,6 +65,7 @@ namespace catalog { #define INDEX_CATALOG_OID (3 | TABLE_OID_MASK) #define COLUMN_CATALOG_OID (4 | TABLE_OID_MASK) #define LAYOUT_CATALOG_OID (5 | TABLE_OID_MASK) +#define COLUMN_STATS_CATALOG_OID (6 | TABLE_OID_MASK) // Reserved pg_column index oid #define COLUMN_CATALOG_PKEY_OID (0 | INDEX_OID_MASK) @@ -92,6 +94,10 @@ namespace catalog { #define LAYOUT_CATALOG_PKEY_OID (13 | INDEX_OID_MASK) #define LAYOUT_CATALOG_SKEY0_OID (14 | INDEX_OID_MASK) +// Reserved pg_column_stats index oid +#define COLUMN_STATS_CATALOG_SKEY0_OID (15 | INDEX_OID_MASK) +#define COLUMN_STATS_CATALOG_SKEY1_OID (16 | INDEX_OID_MASK) + // Use upper 8 bits indicating catalog type #define CATALOG_TYPE_OFFSET 24 diff --git a/src/include/catalog/column_stats_catalog.h b/src/include/catalog/column_stats_catalog.h index d409a9da338..c22e12a38ec 100644 --- a/src/include/catalog/column_stats_catalog.h +++ b/src/include/catalog/column_stats_catalog.h @@ -14,18 +14,18 @@ // pg_column_stats // // Schema: (column offset: column_name) -// 0: database_id (pkey) -// 1: table_id (pkey) -// 2: column_id (pkey) -// 3: num_rows -// 4: cardinality -// 5: frac_null -// 6: most_common_vals -// 7: most_common_freqs -// 8: histogram_bounds +// 0: table_id (pkey) +// 1: column_id (pkey) +// 2: num_rows +// 3: cardinality +// 4: frac_null +// 5: most_common_vals +// 6: most_common_freqs +// 7: histogram_bounds // // Indexes: (index offset: indexed columns) -// 0: name & database_oid (unique & primary key) +// 0: table_id & column_id (unique) +// 1: table_id (non-unique) // //===----------------------------------------------------------------------===// @@ -35,8 +35,6 @@ #include "catalog/abstract_catalog.h" -#define COLUMN_STATS_CATALOG_NAME "pg_column_stats" - namespace peloton { namespace optimizer { @@ -47,50 +45,52 @@ namespace catalog { class ColumnStatsCatalog : public AbstractCatalog { public: - ~ColumnStatsCatalog(); + ColumnStatsCatalog(storage::Database *pg_catalog, type::AbstractPool *pool, + concurrency::TransactionContext *txn); - // Global Singleton - static ColumnStatsCatalog *GetInstance( - concurrency::TransactionContext *txn = nullptr); + ~ColumnStatsCatalog(); //===--------------------------------------------------------------------===// // write Related API //===--------------------------------------------------------------------===// - bool InsertColumnStats(oid_t database_id, oid_t table_id, oid_t column_id, - int num_rows, double cardinality, double frac_null, + bool InsertColumnStats(oid_t table_id, oid_t column_id, int num_rows, + double cardinality, double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, concurrency::TransactionContext *txn); - bool DeleteColumnStats(oid_t database_id, oid_t table_id, oid_t column_id, + bool DeleteColumnStats(oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); //===--------------------------------------------------------------------===// // Read-only Related API //===--------------------------------------------------------------------===// std::unique_ptr> GetColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn); + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); size_t GetTableStats( - oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn, - std::map>> & - column_stats_map); + oid_t table_id, concurrency::TransactionContext *txn, + std::map>> + &column_stats_map); // TODO: add more if needed + /** @brief private function for initialize schema of pg_index + * @return unqiue pointer to schema + */ + std::unique_ptr InitializeSchema(); + enum ColumnId { - DATABASE_ID = 0, - TABLE_ID = 1, - COLUMN_ID = 2, - NUM_ROWS = 3, - CARDINALITY = 4, - FRAC_NULL = 5, - MOST_COMMON_VALS = 6, - MOST_COMMON_FREQS = 7, - HISTOGRAM_BOUNDS = 8, - COLUMN_NAME = 9, - HAS_INDEX = 10, + TABLE_ID = 0, + COLUMN_ID = 1, + NUM_ROWS = 2, + CARDINALITY = 3, + FRAC_NULL = 4, + MOST_COMMON_VALS = 5, + MOST_COMMON_FREQS = 6, + HISTOGRAM_BOUNDS = 7, + COLUMN_NAME = 8, + HAS_INDEX = 9, // Add new columns here in creation order }; @@ -106,7 +106,6 @@ class ColumnStatsCatalog : public AbstractCatalog { }; private: - ColumnStatsCatalog(concurrency::TransactionContext *txn); enum IndexId { SECONDARY_KEY_0 = 0, diff --git a/src/include/catalog/system_catalogs.h b/src/include/catalog/system_catalogs.h index 9792d180f9d..d45c98dbef9 100644 --- a/src/include/catalog/system_catalogs.h +++ b/src/include/catalog/system_catalogs.h @@ -29,12 +29,13 @@ class Database; } // namespace storage namespace catalog { +class ColumnCatalog; +class ColumnStatsCatalog; class DatabaseCatalog; -class SchemaCatalog; -class TableCatalog; class IndexCatalog; -class ColumnCatalog; class LayoutCatalog; +class SchemaCatalog; +class TableCatalog; class SystemCatalogs { public: @@ -59,6 +60,13 @@ class SystemCatalogs { return pg_attribute_; } + ColumnStatsCatalog *GetColumnStatsCatalog() { + if (!pg_column_stats_) { + throw CatalogException("ColumnStats catalog has not been initialized"); + } + return pg_column_stats_; + } + SchemaCatalog *GetSchemaCatalog() { if (!pg_namespace_) { throw CatalogException("schema catalog has not been initialized"); @@ -117,6 +125,7 @@ class SystemCatalogs { private: ColumnCatalog *pg_attribute_; + ColumnStatsCatalog *pg_column_stats_; SchemaCatalog *pg_namespace_; TableCatalog *pg_table_; IndexCatalog *pg_index_; diff --git a/src/include/optimizer/stats/stats_storage.h b/src/include/optimizer/stats/stats_storage.h index 85472c6a12d..bf906755fe8 100644 --- a/src/include/optimizer/stats/stats_storage.h +++ b/src/include/optimizer/stats/stats_storage.h @@ -41,10 +41,6 @@ class StatsStorage { StatsStorage(); - /* Functions for managing stats table and schema */ - - void CreateStatsTableInCatalog(); - /* Functions for adding, updating and quering stats */ void InsertOrUpdateTableStats(storage::DataTable *table, @@ -71,8 +67,8 @@ class StatsStorage { /* Functions for triggerring stats collection */ - ResultType AnalyzeStatsForAllTables( - concurrency::TransactionContext *txn = nullptr); + ResultType AnalyzeStatsForAllTablesWithDatabaseOid( + oid_t database_oid, concurrency::TransactionContext *txn = nullptr); ResultType AnalyzeStatsForTable( storage::DataTable *table, diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index d1b2fed6b12..6cadc47fb78 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -14,6 +14,7 @@ #include "catalog/catalog.h" #include "catalog/column_stats_catalog.h" +#include "catalog/system_catalogs.h" #include "concurrency/transaction_manager_factory.h" #include "optimizer/stats/column_stats.h" #include "optimizer/stats/table_stats.h" @@ -31,24 +32,11 @@ StatsStorage *StatsStorage::GetInstance() { /** * StatsStorage - Constructor of StatsStorage. - * In the construcotr, `pg_column_stats` table and `samples_db` database are - * created. + * In the construcotr, the EphemeralPool is created. */ StatsStorage::StatsStorage() { pool_.reset(new type::EphemeralPool()); - CreateStatsTableInCatalog(); } - -/** - * CreateStatsCatalog - Create 'pg_column_stats' table in the catalog database. - */ -void StatsStorage::CreateStatsTableInCatalog() { - auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); - auto txn = txn_manager.BeginTransaction(); - catalog::ColumnStatsCatalog::GetInstance(txn); - txn_manager.CommitTransaction(txn); -} - /** * InsertOrUpdateTableStats - Add or update all column stats of a table. * This function iterates all column stats in the table stats and insert column @@ -109,7 +97,9 @@ void StatsStorage::InsertOrUpdateColumnStats( LOG_TRACE("InsertOrUpdateColumnStats, %d, %lf, %lf, %s, %s, %s", num_rows, cardinality, frac_null, most_common_vals.c_str(), most_common_freqs.c_str(), histogram_bounds.c_str()); - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); bool single_statement_txn = false; @@ -117,12 +107,11 @@ void StatsStorage::InsertOrUpdateColumnStats( single_statement_txn = true; txn = txn_manager.BeginTransaction(); } - column_stats_catalog->DeleteColumnStats(database_id, table_id, column_id, - txn); - column_stats_catalog->InsertColumnStats( - database_id, table_id, column_id, num_rows, cardinality, frac_null, - most_common_vals, most_common_freqs, histogram_bounds, column_name, - has_index, pool_.get(), txn); + pg_column_stats->DeleteColumnStats(table_id, column_id, txn); + pg_column_stats->InsertColumnStats(table_id, column_id, num_rows, cardinality, + frac_null, most_common_vals, + most_common_freqs, histogram_bounds, + column_name, has_index, pool_.get(), txn); if (single_statement_txn) { txn_manager.CommitTransaction(txn); @@ -136,12 +125,14 @@ void StatsStorage::InsertOrUpdateColumnStats( std::shared_ptr StatsStorage::GetColumnStatsByID(oid_t database_id, oid_t table_id, oid_t column_id) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); // std::unique_ptr> column_stats_vector - auto column_stats_vector = column_stats_catalog->GetColumnStats( - database_id, table_id, column_id, txn); + auto column_stats_vector = + pg_column_stats->GetColumnStats(table_id, column_id, txn); txn_manager.CommitTransaction(txn); return ConvertVectorToColumnStats(database_id, table_id, column_id, @@ -218,10 +209,11 @@ std::shared_ptr StatsStorage::ConvertVectorToColumnStats( */ std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; - column_stats_catalog->GetTableStats(database_id, table_id, txn, - column_stats_map); + pg_column_stats->GetTableStats(table_id, txn, column_stats_map); std::vector> column_stats_ptrs; for (auto it = column_stats_map.begin(); it != column_stats_map.end(); ++it) { @@ -242,10 +234,11 @@ std::shared_ptr StatsStorage::GetTableStats( std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, std::vector column_ids, concurrency::TransactionContext *txn) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; - column_stats_catalog->GetTableStats(database_id, table_id, txn, - column_stats_map); + pg_column_stats->GetTableStats(table_id, txn, column_stats_map); std::vector> column_stats_ptrs; for (oid_t col_id : column_ids) { @@ -263,31 +256,31 @@ std::shared_ptr StatsStorage::GetTableStats( * AnalyzeStatsForAllTables - This function iterates all databases and * datatables to collect their stats and store them in the column_stats_catalog. */ -ResultType StatsStorage::AnalyzeStatsForAllTables( - concurrency::TransactionContext *txn) { +ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( + oid_t database_oid, concurrency::TransactionContext *txn) { if (txn == nullptr) { LOG_TRACE("Do not have transaction to analyze all tables' stats."); return ResultType::FAILURE; } auto storage_manager = storage::StorageManager::GetInstance(); - - oid_t database_count = storage_manager->GetDatabaseCount(); - LOG_TRACE("Database count: %u", database_count); - for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { - auto database = storage_manager->GetDatabaseWithOffset(db_offset); - if (database->GetOid() == CATALOG_DATABASE_OID) { + auto database = storage_manager->GetDatabaseWithOid(database_oid); + PELOTON_ASSERT(database != nullptr); + auto pg_database = + catalog::Catalog::GetInstance()->GetDatabaseObject(database_oid, txn); + auto table_objects = pg_database->GetTableObjects(); + for (auto &table_object_entry : table_objects) { + auto table_oid = table_object_entry.first; + auto table_object = table_object_entry.second; + if (table_object->GetSchemaName() == CATALOG_SCHEMA_NAME) { continue; } - oid_t table_count = database->GetTableCount(); - for (oid_t table_offset = 0; table_offset < table_count; table_offset++) { - auto table = database->GetTable(table_offset); - LOG_TRACE("Analyzing table: %s", table->GetName().c_str()); - std::unique_ptr table_stats_collector( - new TableStatsCollector(table)); - table_stats_collector->CollectColumnStats(); - InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); - } + LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); + auto table = database->GetTableWithOid(table_oid); + std::unique_ptr table_stats_collector( + new TableStatsCollector(table)); + table_stats_collector->CollectColumnStats(); + InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); } return ResultType::SUCCESS; } diff --git a/test/executor/drop_test.cpp b/test/executor/drop_test.cpp index 10a9989a139..8590af254f9 100644 --- a/test/executor/drop_test.cpp +++ b/test/executor/drop_test.cpp @@ -124,7 +124,6 @@ TEST_F(DropTests, DroppingTable) { ->GetTableObjects() .size(), expeected_table_count); - // free the database just created catalog->DropDatabaseWithName(TEST_DB_NAME, txn); txn_manager.CommitTransaction(txn); diff --git a/test/executor/testing_executor_util.cpp b/test/executor/testing_executor_util.cpp index 455cf33e5e7..c2667a9988c 100644 --- a/test/executor/testing_executor_util.cpp +++ b/test/executor/testing_executor_util.cpp @@ -47,6 +47,9 @@ using ::testing::Return; namespace peloton { namespace test { +// Initialize the default database_oid +oid_t TestingExecutorUtil::database_oid = INVALID_OID; + storage::Database *TestingExecutorUtil::InitializeDatabase( const std::string &db_name) { auto catalog = catalog::Catalog::GetInstance(); @@ -56,6 +59,7 @@ storage::Database *TestingExecutorUtil::InitializeDatabase( EXPECT_EQ(ResultType::SUCCESS, result); auto database = catalog->GetDatabaseWithName(db_name, txn); txn_manager.CommitTransaction(txn); + database_oid = database->GetOid(); return (database); } @@ -66,6 +70,7 @@ void TestingExecutorUtil::DeleteDatabase(const std::string &db_name) { auto result = catalog->DropDatabaseWithName(db_name, txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(ResultType::SUCCESS, result); + database_oid = INVALID_OID; } /** @brief Helper function for defining schema */ @@ -349,7 +354,7 @@ storage::DataTable *TestingExecutorUtil::CreateTable( bool own_schema = true; bool adapt_table = false; storage::DataTable *table = storage::TableFactory::GetDataTable( - INVALID_OID, table_oid, table_schema, table_name, + database_oid, table_oid, table_schema, table_name, tuples_per_tilegroup_count, own_schema, adapt_table); if (indexes == true) { @@ -422,7 +427,7 @@ storage::DataTable *TestingExecutorUtil::CreateTable( } storage::DataTable *TestingExecutorUtil::CreateTableUpdateCatalog( - int tuples_per_tilegroup_count, std::string &db_name) { + int tuples_per_tilegroup_count, const std::string &db_name) { auto table_schema = std::unique_ptr( new catalog::Schema({GetColumnInfo(0), GetColumnInfo(1), GetColumnInfo(2), GetColumnInfo(3)})); diff --git a/test/include/executor/testing_executor_util.h b/test/include/executor/testing_executor_util.h index 5d9dc14b5dd..fa081a1f0ba 100644 --- a/test/include/executor/testing_executor_util.h +++ b/test/include/executor/testing_executor_util.h @@ -60,9 +60,16 @@ class TestingExecutorUtil { * @brief Intializes the catalog with a new database with the give name. */ static storage::Database *InitializeDatabase(const std::string &db_name); - + /** + * @brief Drops the catalog of the database with the give name. + */ static void DeleteDatabase(const std::string &db_name); + /** + * @brief Returns the current database_oid. + */ + static oid_t GetDatabaseOid() { return database_oid; } + /** * @brief Creates a basic tile group with allocated but not populated * tuples. @@ -81,7 +88,7 @@ class TestingExecutorUtil { * @return A pointer to the DataTable created. */ static storage::DataTable *CreateTableUpdateCatalog( - int tuples_per_tilegroup_count, std::string &db_name); + int tuples_per_tilegroup_count, const std::string &db_name); /** @brief Creates a basic table with allocated and populated tuples */ static storage::DataTable *CreateAndPopulateTable(); @@ -122,6 +129,9 @@ class TestingExecutorUtil { /** Print the tuples from a vector of logical tiles */ static std::string GetTileVectorInfo( std::vector> &tile_vec); + + private: + static oid_t database_oid; }; } // namespace test diff --git a/test/optimizer/selectivity_test.cpp b/test/optimizer/selectivity_test.cpp index cbb8df08d2c..d845cd40c37 100644 --- a/test/optimizer/selectivity_test.cpp +++ b/test/optimizer/selectivity_test.cpp @@ -119,6 +119,10 @@ TEST_F(SelectivityTests, RangeSelectivityTest) { TEST_F(SelectivityTests, LikeSelectivityTest) { const int tuple_count = 1000; const int tuple_per_tilegroup = 100; + const std::string db_name = "test_db"; + + // Initialize the DB inorder to initialize pg_column_stats + TestingExecutorUtil::InitializeDatabase(db_name); // Create a table auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); @@ -160,6 +164,9 @@ TEST_F(SelectivityTests, LikeSelectivityTest) { EXPECT_EQ(like_than_sel_1, 1); EXPECT_EQ(like_than_sel_2, 0); + + // Drop the database created + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(SelectivityTests, EqualSelectivityTest) { diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index edf42131355..66526b70ebe 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -51,15 +51,15 @@ std::unique_ptr InitializeTestTable() { storage::DataTable *CreateTestDBAndTable() { const std::string test_db_name = "test_db"; - auto database = TestingExecutorUtil::InitializeDatabase(test_db_name); + TestingExecutorUtil::InitializeDatabase(test_db_name); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); storage::DataTable *data_table = - TestingExecutorUtil::CreateTable(tuple_per_tilegroup, false); + TestingExecutorUtil::CreateTableUpdateCatalog(tuple_per_tilegroup, + test_db_name); TestingExecutorUtil::PopulateTable(data_table, tuple_count, false, false, true, txn); - database->AddTable(data_table); txn_manager.CommitTransaction(txn); return data_table; } @@ -93,6 +93,8 @@ void VerifyAndPrintColumnStats(storage::DataTable *data_table, } TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); auto data_table = InitializeTestTable(); // Collect stats. @@ -108,6 +110,7 @@ TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { table_stats_collector.get()); VerifyAndPrintColumnStats(data_table.get(), 4); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { @@ -115,7 +118,10 @@ TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { (void)catalog; StatsStorage *stats_storage = StatsStorage::GetInstance(); - oid_t database_id = 1; + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + + oid_t database_id = TestingExecutorUtil::GetDatabaseOid(); oid_t table_id = 2; oid_t column_id = 3; int num_rows = 10; @@ -146,6 +152,7 @@ TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { auto column_stats_ptr2 = stats_storage->GetColumnStatsByID(database_id, table_id, column_id + 1); EXPECT_EQ(column_stats_ptr2, nullptr); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, UpdateColumnStatsTest) { @@ -153,7 +160,10 @@ TEST_F(StatsStorageTests, UpdateColumnStatsTest) { (void)catalog; StatsStorage *stats_storage = StatsStorage::GetInstance(); - oid_t database_id = 1; + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + + oid_t database_id = TestingExecutorUtil::GetDatabaseOid(); oid_t table_id = 2; oid_t column_id = 3; @@ -193,9 +203,13 @@ TEST_F(StatsStorageTests, UpdateColumnStatsTest) { EXPECT_EQ(column_stats_ptr->frac_null, frac_null_1); EXPECT_EQ(column_stats_ptr->column_name, column_name_1); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + auto data_table = InitializeTestTable(); // Analyze table. @@ -213,43 +227,53 @@ TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { // Check the correctness of the stats. VerifyAndPrintColumnStats(data_table.get(), 4); + TestingExecutorUtil::DeleteDatabase(db_name); } // TODO: Add more tables. TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { + const std::string db_name = "test_db"; auto data_table = CreateTestDBAndTable(); + auto db_oid = data_table->GetDatabaseOid(); StatsStorage *stats_storage = StatsStorage::GetInstance(); // Must pass in the transaction. - ResultType result = stats_storage->AnalyzeStatsForAllTables(); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); EXPECT_EQ(result, ResultType::FAILURE); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - result = stats_storage->AnalyzeStatsForAllTables(txn); + result = stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); EXPECT_EQ(result, ResultType::SUCCESS); txn_manager.CommitTransaction(txn); // Check the correctness of the stats. VerifyAndPrintColumnStats(data_table, 4); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, GetTableStatsTest) { - auto data_table = InitializeTestTable(); + const std::string db_name = "test_db"; + auto data_table = CreateTestDBAndTable(); + auto db_oid = data_table->GetDatabaseOid(); StatsStorage *stats_storage = StatsStorage::GetInstance(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - stats_storage->AnalyzeStatsForAllTables(txn); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); + EXPECT_EQ(ResultType::SUCCESS, result); txn_manager.CommitTransaction(txn); txn = txn_manager.BeginTransaction(); - std::shared_ptr table_stats = stats_storage->GetTableStats( - data_table->GetDatabaseOid(), data_table->GetOid(), txn); + std::shared_ptr table_stats = + stats_storage->GetTableStats(db_oid, data_table->GetOid(), txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(table_stats->num_rows, tuple_count); + TestingExecutorUtil::DeleteDatabase(db_name); } } // namespace test diff --git a/test/sql/analyze_sql_test.cpp b/test/sql/analyze_sql_test.cpp index 16191ec000d..a564602a8be 100644 --- a/test/sql/analyze_sql_test.cpp +++ b/test/sql/analyze_sql_test.cpp @@ -74,7 +74,7 @@ TEST_F(AnalyzeSQLTests, AnalyzeSingleTableTest) { txn = txn_manager.BeginTransaction(); auto catalog = catalog::Catalog::GetInstance(); storage::DataTable *db_column_stats_collector_table = - catalog->GetTableWithName(CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, + catalog->GetTableWithName(DEFAULT_DB_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, txn); EXPECT_NE(db_column_stats_collector_table, nullptr); EXPECT_EQ(db_column_stats_collector_table->GetTupleCount(), 4);