Skip to content

Commit

Permalink
Refactor code in tests related to fd and ucc tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vs9h committed Dec 4, 2023
1 parent 2e67bfd commit 4f04a14
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 211 deletions.
74 changes: 7 additions & 67 deletions src/tests/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@

static auto const test_data_dir = std::filesystem::current_path() / "input_data";

struct Dataset {
std::string name;
size_t hash;
char separator;
bool has_header;
};
namespace tests {

/// dataset configuration info to create an input table
struct DatasetInfo {
std::string_view name;
char separator;
Expand All @@ -30,65 +26,9 @@ struct DatasetInfo {
}
};

class LightDatasets {
public:
static inline const std::array<Dataset, 11> datasets_ = {
{{"CIPublicHighway10k.csv", 33398, ',', true},
{"neighbors10k.csv", 43368, ',', true},
{"WDC_astronomical.csv", 22281, ',', true},
{"WDC_age.csv", 19620, ',', true},
{"WDC_appearances.csv", 25827, ',', true},
{"WDC_astrology.csv", 40815, ',', true},
{"WDC_game.csv", 6418, ',', true},
{"WDC_science.csv", 19620, ',', true},
{"WDC_symbols.csv", 28289, ',', true},
{"breast_cancer.csv", 15121, ',', true},
{"WDC_kepler.csv", 63730, ',', true}}};
/// a pair consisting of a dataset and the expected hash
using DatasetHashPair = std::pair<DatasetInfo, size_t>;
/// a pair consisting of a vector of datasets and an expected hash
using DatasetsHashPair = std::pair<std::vector<DatasetInfo>, size_t>;

// DEPRECATED -- just use
// for (auto dataset : LightDatasets::datasets) { ... }
static size_t DatasetQuantity() {
return datasets_.size();
}
static std::string DatasetName(size_t i) {
return datasets_[i].name;
}
static char Separator(size_t i) {
return datasets_[i].separator;
}
static bool HasHeader(size_t i) {
return datasets_[i].has_header;
}
static unsigned int Hash(size_t i) {
return datasets_[i].hash;
}
};

class HeavyDatasets {
public:
static inline const std::array<Dataset, 6> datasets_ = {
{{"adult.csv", 23075, ';', false},
{"CIPublicHighway.csv", 13035, ',', true},
{"EpicMeds.csv", 50218, '|', true},
{"EpicVitals.csv", 2083, '|', true},
{"iowa1kk.csv", 28573, ',', true},
{"LegacyPayors.csv", 43612, '|', true}}};

// DEPRECATED -- just use
// for (auto dataset : HeavyDatasets::datasets) { ... }
static size_t DatasetQuantity() {
return datasets_.size();
}
static std::string DatasetName(size_t i) {
return datasets_[i].name;
}
static char Separator(size_t i) {
return datasets_[i].separator;
}
static bool HasHeader(size_t i) {
return datasets_[i].has_header;
}
static unsigned int Hash(size_t i) {
return datasets_[i].hash;
}
};
} // namespace tests
44 changes: 17 additions & 27 deletions src/tests/test_algorithm.cpp → src/tests/test_fd_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "algorithms/fd/tane/tane.h"
#include "datasets.h"
#include "model/table/relational_schema.h"
#include "testing_utils.h"
#include "test_fd_util.h"

using std::string, std::vector;
using ::testing::ContainerEq, ::testing::Eq;
Expand Down Expand Up @@ -76,20 +76,20 @@ std::set<std::pair<std::vector<unsigned int>, unsigned int>> FDsToSet(std::list<
TYPED_TEST_SUITE_P(AlgorithmTest);

TYPED_TEST_P(AlgorithmTest, ThrowsOnEmpty) {
auto algorithm = TestFixture::CreateAndConfToLoad(test_data_dir / "TestEmpty.csv", ',', true);
auto algorithm = TestFixture::CreateAndConfToLoad(tests::kTestEmpty);
ASSERT_THROW(algorithm->LoadData(), std::runtime_error);
}

TYPED_TEST_P(AlgorithmTest, ReturnsEmptyOnSingleNonKey) {
auto algorithm = TestFixture::CreateAlgorithmInstance("TestSingleColumn.csv", ',', true);
auto algorithm = TestFixture::CreateAlgorithmInstance(tests::kTestSingleColumn);
algorithm->Execute();
ASSERT_TRUE(algorithm->FdList().empty());
}

TYPED_TEST_P(AlgorithmTest, WorksOnLongDataset) {
std::set<std::pair<std::vector<unsigned int>, unsigned int>> true_fd_collection{{{2}, 1}};

auto algorithm = TestFixture::CreateAlgorithmInstance("TestLong.csv", ',', true);
auto algorithm = TestFixture::CreateAlgorithmInstance(tests::kTestLong);
algorithm->Execute();
ASSERT_TRUE(CheckFdListEquality(true_fd_collection, algorithm->FdList()));
}
Expand All @@ -98,19 +98,18 @@ TYPED_TEST_P(AlgorithmTest, WorksOnWideDataset) {
std::set<std::pair<std::vector<unsigned int>, unsigned int>> true_fd_collection{
{{0}, 2}, {{0}, 4}, {{2}, 0}, {{2}, 4}, {{4}, 0}, {{4}, 2}, {{}, 1}, {{}, 3}};

auto algorithm = TestFixture::CreateAlgorithmInstance("TestWide.csv", ',', true);
auto algorithm = TestFixture::CreateAlgorithmInstance(tests::kTestWide);
algorithm->Execute();
ASSERT_TRUE(CheckFdListEquality(true_fd_collection, algorithm->FdList()));
}

TYPED_TEST_P(AlgorithmTest, LightDatasetsConsistentHash) {
template <typename T>
void PerformConsistentHashTestOn(std::vector<tests::DatasetHashPair> const& datasets) {
try {
for (auto const& dataset : LightDatasets::datasets_) {
auto algorithm = TestFixture::CreateAlgorithmInstance(dataset.name, dataset.separator,
dataset.has_header);
for (auto const& [dataset, hash] : datasets) {
auto algorithm = T::CreateAlgorithmInstance(dataset);
algorithm->Execute();
std::cout << dataset.name << std::endl;
EXPECT_EQ(algorithm->Fletcher16(), dataset.hash)
EXPECT_EQ(algorithm->Fletcher16(), hash)
<< "FD collection hash changed for " << dataset.name;
}
} catch (std::runtime_error& e) {
Expand All @@ -120,29 +119,20 @@ TYPED_TEST_P(AlgorithmTest, LightDatasetsConsistentHash) {
SUCCEED();
}

TYPED_TEST_P(AlgorithmTest, LightDatasetsConsistentHash) {
PerformConsistentHashTestOn<TestFixture>(TestFixture::light_datasets_);
}

TYPED_TEST_P(AlgorithmTest, HeavyDatasetsConsistentHash) {
try {
for (auto const& dataset : HeavyDatasets::datasets_) {
auto algorithm = TestFixture::CreateAlgorithmInstance(dataset.name, dataset.separator,
dataset.has_header);
algorithm->Execute();
EXPECT_EQ(algorithm->Fletcher16(), dataset.hash)
<< "The new algorithm and Pyro yield different results at " << dataset.name;
}
} catch (std::runtime_error& e) {
std::cout << "Exception raised in test: " << e.what() << std::endl;
FAIL();
}
SUCCEED();
PerformConsistentHashTestOn<TestFixture>(TestFixture::heavy_datasets_);
}

TYPED_TEST_P(AlgorithmTest, ConsistentRepeatedExecution) {
auto const path = test_data_dir / "WDC_astronomical.csv";
auto algorithm = TestFixture::CreateAlgorithmInstance(path, ',', true);
auto algorithm = TestFixture::CreateAlgorithmInstance(tests::kWDC_astronomical);
algorithm->Execute();
auto first_res = FDsToSet(algorithm->FdList());
for (int i = 0; i < 3; ++i) {
algos::ConfigureFromMap(*algorithm, TestFixture::GetParamMap(path, ',', true));
algos::ConfigureFromMap(*algorithm, TestFixture::GetParamMap(tests::kWDC_astronomical));
algorithm->Execute();
ASSERT_TRUE(CheckFdListEquality(first_res, algorithm->FdList()));
}
Expand Down
42 changes: 16 additions & 26 deletions src/tests/test_fd_mine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "config/names.h"
#include "datasets.h"
#include "model/table/relational_schema.h"
#include "test_fd_util.h"

using ::testing::ContainerEq, ::testing::Eq;

Expand All @@ -23,26 +24,21 @@ using std::string, std::vector;

namespace onam = config::names;

StdParamsMap FD_MineGetParamMap(const std::filesystem::path& path, char separator = ',',
bool has_header = true) {
InputTable parser = std::make_unique<CSVParser>(path, separator, has_header);
return {{config::names::kTable, parser}};
StdParamsMap FD_MineGetParamMap(tests::DatasetInfo const& info) {
return {{config::names::kTable, info.MakeInputTable()}};
}

std::unique_ptr<FDAlgorithm> ConfToLoadFD_Mine(std::string const& path, char separator = ',',
bool has_header = true) {
std::unique_ptr<FDAlgorithm> ConfToLoadFD_Mine(tests::DatasetInfo const& info) {
std::unique_ptr<FDAlgorithm> algorithm = std::make_unique<Fd_mine>();
algos::ConfigureFromMap(*algorithm, FD_MineGetParamMap(path, separator, has_header));
algos::ConfigureFromMap(*algorithm, FD_MineGetParamMap(info));
return algorithm;
}

std::unique_ptr<FDAlgorithm> CreateFD_MineAlgorithmInstance(std::string const& path,
char separator = ',',
bool has_header = true) {
return algos::CreateAndLoadAlgorithm<Fd_mine>(FD_MineGetParamMap(path, separator, has_header));
std::unique_ptr<FDAlgorithm> CreateFD_MineAlgorithmInstance(tests::DatasetInfo const& info) {
return algos::CreateAndLoadAlgorithm<Fd_mine>(FD_MineGetParamMap(info));
}

class AlgorithmTest : public LightDatasets, public HeavyDatasets, public ::testing::Test {};
using FDMineAlgorithmTest = AlgorithmTest<Fd_mine>;

std::vector<unsigned int> FD_MineBitsetToIndexVector(boost::dynamic_bitset<> const& bitset) {
std::vector<unsigned int> res;
Expand Down Expand Up @@ -85,24 +81,20 @@ std::set<std::pair<std::vector<unsigned int>, unsigned int>> FD_MineFDsToSet(
}

TEST(AlgorithmSyntheticTest, FD_Mine_ThrowsOnEmpty) {
auto path = test_data_dir / "TestEmpty.csv";
auto algorithm = ConfToLoadFD_Mine(test_data_dir / "TestEmpty.csv", ',', true);
auto algorithm = ConfToLoadFD_Mine(tests::kTestEmpty);
ASSERT_THROW(algorithm->LoadData(), std::runtime_error);
}

TEST(AlgorithmSyntheticTest, FD_Mine_ReturnsEmptyOnSingleNonKey) {
auto path = test_data_dir / "TestSingleColumn.csv";
auto algorithm = CreateFD_MineAlgorithmInstance(path, ',', true);
auto algorithm = CreateFD_MineAlgorithmInstance(tests::kTestSingleColumn);
algorithm->Execute();
ASSERT_TRUE(algorithm->FdList().empty());
}

TEST(AlgorithmSyntheticTest, FD_Mine_WorksOnLongDataset) {
auto path = test_data_dir / "TestLong.csv";

std::set<std::pair<std::vector<unsigned int>, unsigned int>> true_fd_collection{{{2}, 1}};

auto algorithm = CreateFD_MineAlgorithmInstance(path, ',', true);
auto algorithm = CreateFD_MineAlgorithmInstance(tests::kTestLong);
algorithm->Execute();
ASSERT_TRUE(FD_Mine_CheckFDListEquality(true_fd_collection, algorithm->FdList()));
}
Expand Down Expand Up @@ -148,20 +140,18 @@ void MinimizeFDs(std::list<FD>& fd_collection) {
}
}

TEST_F(AlgorithmTest, FD_Mine_ReturnsSameAsPyro) {
TEST_F(FDMineAlgorithmTest, FD_Mine_ReturnsSameAsPyro) {
namespace onam = config::names;

try {
for (Dataset const& dataset : LightDatasets::datasets_) {
for (auto const& [dataset, hash] : FDMineAlgorithmTest::light_datasets_) {
// TODO: change this hotfix
if (dataset.name == "breast_cancer.csv") {
if (dataset.name == tests::kbreast_cancer.name) {
continue;
}
auto path = test_data_dir / dataset.name;
auto algorithm =
CreateFD_MineAlgorithmInstance(path, dataset.separator, dataset.has_header);
auto algorithm = CreateFD_MineAlgorithmInstance(dataset);

StdParamsMap params_map{{onam::kCsvPath, path},
StdParamsMap params_map{{onam::kCsvPath, dataset.GetPath()},
{onam::kSeparator, dataset.separator},
{onam::kHasHeader, dataset.has_header},
{onam::kSeed, decltype(pyro::Parameters::seed){0}},
Expand Down
59 changes: 59 additions & 0 deletions src/tests/test_fd_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once

#include <filesystem>

#include <gtest/gtest.h>

#include "algorithms/algo_factory.h"
#include "algorithms/fd/fd_algorithm.h"
#include "all_datasets_info.h"
#include "config/error/type.h"
#include "config/names.h"
#include "datasets.h"

template <typename T>
class AlgorithmTest : public ::testing::Test {
protected:
static std::unique_ptr<algos::FDAlgorithm> CreateAndConfToLoad(tests::DatasetInfo const& info) {
using config::InputTable, algos::ConfigureFromMap, algos::StdParamsMap;
std::unique_ptr<algos::FDAlgorithm> algorithm = std::make_unique<T>();
ConfigureFromMap(*algorithm, StdParamsMap{{config::names::kTable, info.MakeInputTable()}});
return algorithm;
}

static algos::StdParamsMap GetParamMap(tests::DatasetInfo const& info) {
using namespace config::names;
return {
{kTable, info.MakeInputTable()},
{kError, config::ErrorType{0.0}},
{kSeed, decltype(pyro::Parameters::seed){0}},
};
}

public:
static std::unique_ptr<algos::FDAlgorithm> CreateAlgorithmInstance(
tests::DatasetInfo const& info) {
return algos::CreateAndLoadAlgorithm<T>(GetParamMap(info));
}

inline static std::vector<tests::DatasetHashPair> const light_datasets_ = {
{{tests::kCIPublicHighway10k, 33398},
{tests::kneighbors10k, 43368},
{tests::kWDC_astronomical, 22281},
{tests::kWDC_age, 19620},
{tests::kWDC_appearances, 25827},
{tests::kWDC_astrology, 40815},
{tests::kWDC_game, 6418},
{tests::kWDC_science, 19620},
{tests::kWDC_symbols, 28289},
{tests::kbreast_cancer, 15121},
{tests::kWDC_kepler, 63730}}};

inline static std::vector<tests::DatasetHashPair> const heavy_datasets_ = {
{{tests::kadult, 23075},
{tests::kCIPublicHighway, 13035},
{tests::kEpicMeds, 50218},
{tests::kEpicVitals, 2083},
{tests::kiowa1kk, 28573},
{tests::kLegacyPayors, 43612}}};
};
Loading

0 comments on commit 4f04a14

Please sign in to comment.