diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index f808d16e8d3..5fd2b3c3174 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -160,16 +160,17 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn); system_catalogs->GetIndexCatalog()->InsertIndex( - COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", - COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, - IndexConstraintType::UNIQUE, true, - {ColumnStatsCatalog::ColumnId::TABLE_ID, - ColumnStatsCatalog::ColumnId::COLUMN_ID}, pool_.get(), txn); + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID, + ColumnStatsCatalog::ColumnId::COLUMN_ID}, + pool_.get(), txn); system_catalogs->GetIndexCatalog()->InsertIndex( - COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", - COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, - IndexConstraintType::UNIQUE, true, - {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); + COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); // Insert records(default + pg_catalog namespace) into pg_namespace system_catalogs->GetSchemaCatalog()->InsertSchema( @@ -198,8 +199,8 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); system_catalogs->GetTableCatalog()->InsertTable( - COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, - CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); + COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, CATALOG_SCHEMA_NAME, + database_oid, pool_.get(), txn); } void Catalog::Bootstrap() { diff --git a/src/catalog/column_stats_catalog.cpp b/src/catalog/column_stats_catalog.cpp index d29e9f82413..d9d84fb7c01 100644 --- a/src/catalog/column_stats_catalog.cpp +++ b/src/catalog/column_stats_catalog.cpp @@ -23,74 +23,71 @@ namespace peloton { namespace catalog { ColumnStatsCatalog::ColumnStatsCatalog( - storage::Database *pg_catalog, - UNUSED_ATTRIBUTE type::AbstractPool *pool, - UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) - : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, - InitializeSchema().release(), pg_catalog) { + storage::Database *pg_catalog, UNUSED_ATTRIBUTE type::AbstractPool *pool, + UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) + : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, + InitializeSchema().release(), pg_catalog) { // Add indexes for pg_column_stats AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID}, COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", IndexConstraintType::UNIQUE); AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT); - } ColumnStatsCatalog::~ColumnStatsCatalog() {} std::unique_ptr ColumnStatsCatalog::InitializeSchema() { - const std::string not_null_constraint_name = "notnull"; - const auto not_null_constraint = catalog::Constraint( - ConstraintType::NOTNULL, not_null_constraint_name); + const auto not_null_constraint = + catalog::Constraint(ConstraintType::NOTNULL, not_null_constraint_name); auto table_id_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "table_id", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "table_id", true); table_id_column.AddConstraint(not_null_constraint); auto column_id_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "column_id", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "column_id", true); column_id_column.AddConstraint(not_null_constraint); auto num_rows_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "num_rows", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "num_rows", true); num_rows_column.AddConstraint(not_null_constraint); auto cardinality_column = catalog::Column( - type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), - "cardinality", true); + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "cardinality", true); cardinality_column.AddConstraint(not_null_constraint); auto frac_null_column = catalog::Column( - type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), - "frac_null", true); + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "frac_null", true); frac_null_column.AddConstraint(not_null_constraint); auto most_common_vals_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "most_common_vals", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_vals", false); auto most_common_freqs_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "most_common_freqs", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_freqs", false); auto histogram_bounds_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "histogram_bounds", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "histogram_bounds", false); auto column_name_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "column_name", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "column_name", false); auto has_index_column = catalog::Column( - type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), - "has_index", true); + type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), + "has_index", true); std::unique_ptr column_stats_schema(new catalog::Schema( - {table_id_column, column_id_column, num_rows_column, cardinality_column, - frac_null_column, most_common_vals_column, most_common_freqs_column, - histogram_bounds_column, column_name_column, has_index_column})); + {table_id_column, column_id_column, num_rows_column, cardinality_column, + frac_null_column, most_common_vals_column, most_common_freqs_column, + histogram_bounds_column, column_name_column, has_index_column})); return column_stats_schema; } bool ColumnStatsCatalog::InsertColumnStats( - oid_t table_id, oid_t column_id, int num_rows, - double cardinality, double frac_null, std::string most_common_vals, + oid_t table_id, oid_t column_id, int num_rows, double cardinality, + double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, concurrency::TransactionContext *txn) { @@ -142,8 +139,7 @@ bool ColumnStatsCatalog::InsertColumnStats( } bool ColumnStatsCatalog::DeleteColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; @@ -154,8 +150,7 @@ bool ColumnStatsCatalog::DeleteColumnStats( } std::unique_ptr> ColumnStatsCatalog::GetColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { std::vector column_ids( {ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL, ColumnId::MOST_COMMON_VALS, ColumnId::MOST_COMMON_FREQS, diff --git a/src/include/catalog/column_stats_catalog.h b/src/include/catalog/column_stats_catalog.h index affd1917ef9..c22e12a38ec 100644 --- a/src/include/catalog/column_stats_catalog.h +++ b/src/include/catalog/column_stats_catalog.h @@ -45,7 +45,6 @@ namespace catalog { class ColumnStatsCatalog : public AbstractCatalog { public: - ColumnStatsCatalog(storage::Database *pg_catalog, type::AbstractPool *pool, concurrency::TransactionContext *txn); @@ -54,8 +53,8 @@ class ColumnStatsCatalog : public AbstractCatalog { //===--------------------------------------------------------------------===// // write Related API //===--------------------------------------------------------------------===// - bool InsertColumnStats(oid_t table_id, oid_t column_id, - int num_rows, double cardinality, double frac_null, + bool InsertColumnStats(oid_t table_id, oid_t column_id, int num_rows, + double cardinality, double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, @@ -68,16 +67,14 @@ class ColumnStatsCatalog : public AbstractCatalog { // Read-only Related API //===--------------------------------------------------------------------===// std::unique_ptr> GetColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn); + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); size_t GetTableStats( - oid_t table_id, concurrency::TransactionContext *txn, - std::map>> & - column_stats_map); + oid_t table_id, concurrency::TransactionContext *txn, + std::map>> + &column_stats_map); // TODO: add more if needed - /** @brief private function for initialize schema of pg_index * @return unqiue pointer to schema */ diff --git a/src/include/optimizer/stats/stats_storage.h b/src/include/optimizer/stats/stats_storage.h index 52d3baaf136..bf906755fe8 100644 --- a/src/include/optimizer/stats/stats_storage.h +++ b/src/include/optimizer/stats/stats_storage.h @@ -68,8 +68,7 @@ class StatsStorage { /* Functions for triggerring stats collection */ ResultType AnalyzeStatsForAllTablesWithDatabaseOid( - oid_t database_oid, - concurrency::TransactionContext *txn = nullptr); + oid_t database_oid, concurrency::TransactionContext *txn = nullptr); ResultType AnalyzeStatsForTable( storage::DataTable *table, diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index 1190efec243..c1b58ce27b9 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -98,8 +98,8 @@ void StatsStorage::InsertOrUpdateColumnStats( cardinality, frac_null, most_common_vals.c_str(), most_common_freqs.c_str(), histogram_bounds.c_str()); auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); bool single_statement_txn = false; @@ -107,12 +107,11 @@ void StatsStorage::InsertOrUpdateColumnStats( single_statement_txn = true; txn = txn_manager.BeginTransaction(); } - pg_column_stats->DeleteColumnStats(table_id, column_id, - txn); - pg_column_stats->InsertColumnStats( - table_id, column_id, num_rows, cardinality, frac_null, - most_common_vals, most_common_freqs, histogram_bounds, column_name, - has_index, pool_.get(), txn); + pg_column_stats->DeleteColumnStats(table_id, column_id, txn); + pg_column_stats->InsertColumnStats(table_id, column_id, num_rows, cardinality, + frac_null, most_common_vals, + most_common_freqs, histogram_bounds, + column_name, has_index, pool_.get(), txn); if (single_statement_txn) { txn_manager.CommitTransaction(txn); @@ -127,13 +126,13 @@ std::shared_ptr StatsStorage::GetColumnStatsByID(oid_t database_id, oid_t table_id, oid_t column_id) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); // std::unique_ptr> column_stats_vector - auto column_stats_vector = pg_column_stats->GetColumnStats( - table_id, column_id, txn); + auto column_stats_vector = + pg_column_stats->GetColumnStats(table_id, column_id, txn); txn_manager.CommitTransaction(txn); return ConvertVectorToColumnStats(database_id, table_id, column_id, @@ -211,8 +210,8 @@ std::shared_ptr StatsStorage::ConvertVectorToColumnStats( std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; pg_column_stats->GetTableStats(table_id, txn, column_stats_map); @@ -236,8 +235,8 @@ std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, std::vector column_ids, concurrency::TransactionContext *txn) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; pg_column_stats->GetTableStats(table_id, txn, column_stats_map); @@ -258,18 +257,17 @@ std::shared_ptr StatsStorage::GetTableStats( * datatables to collect their stats and store them in the column_stats_catalog. */ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( - oid_t database_oid, - UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { + oid_t database_oid, UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { if (txn == nullptr) { LOG_TRACE("Do not have transaction to analyze all tables' stats."); return ResultType::FAILURE; } auto storage_manager = storage::StorageManager::GetInstance(); - auto database = storage_manager->GetDatabaseWithOid(database); + auto database = storage_manager->GetDatabaseWithOid(database_oid); PELOTON_ASSERT(database != nullptr); - auto pg_database = catalog::Catalog::GetInstance() - ->GetDatabaseObject(database_oid, txn); + auto pg_database = + catalog::Catalog::GetInstance()->GetDatabaseObject(database_oid, txn); auto table_objects = pg_database->GetTableObjects(); for (auto &table_object_entry : table_objects) { auto table_oid = table_object_entry.first; @@ -280,7 +278,7 @@ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); auto table = database->GetTableWithOid(table_oid); std::unique_ptr table_stats_collector( - new TableStatsCollector(table)); + new TableStatsCollector(table)); table_stats_collector->CollectColumnStats(); InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); } diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index 226abeabe72..baca8a58f39 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -93,7 +93,6 @@ void VerifyAndPrintColumnStats(storage::DataTable *data_table, } TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { - const std::string db_name = "test_db"; TestingExecutorUtil::InitializeDatabase(db_name); auto data_table = InitializeTestTable(); @@ -240,8 +239,8 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { StatsStorage *stats_storage = StatsStorage::GetInstance(); // Must pass in the transaction. - ResultType result = stats_storage - ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); EXPECT_EQ(result, ResultType::FAILURE); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); @@ -253,7 +252,6 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { // Check the correctness of the stats. VerifyAndPrintColumnStats(data_table, 4); TestingExecutorUtil::DeleteDatabase(db_name); - } TEST_F(StatsStorageTests, GetTableStatsTest) { @@ -265,13 +263,14 @@ TEST_F(StatsStorageTests, GetTableStatsTest) { auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - ResultType result = stats_storage - ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); + EXPECT_EQ(ResultType::SUCCESS, result); txn_manager.CommitTransaction(txn); txn = txn_manager.BeginTransaction(); - std::shared_ptr table_stats = stats_storage->GetTableStats( - db_oid, data_table->GetOid(), txn); + std::shared_ptr table_stats = + stats_storage->GetTableStats(db_oid, data_table->GetOid(), txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(table_stats->num_rows, tuple_count); TestingExecutorUtil::DeleteDatabase(db_name);