forked from Desbordante/desbordante-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generalize TANE and PFDTANE algorithms
Generalize TANE-based algorithms, add additional pFD mining tests.
- Loading branch information
Showing
19 changed files
with
207 additions
and
457 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#include "pfdtane.h" | ||
|
||
#include <memory> | ||
|
||
#include <easylogging++.h> | ||
|
||
#include "config/error/option.h" | ||
#include "config/error_measure/option.h" | ||
#include "enums.h" | ||
#include "fd/pli_based_fd_algorithm.h" | ||
#include "model/table/column_data.h" | ||
#include "model/table/column_layout_relation_data.h" | ||
|
||
namespace algos { | ||
using boost::dynamic_bitset; | ||
using Cluster = model::PositionListIndex::Cluster; | ||
|
||
PFDTane::PFDTane(std::optional<ColumnLayoutRelationDataManager> relation_manager) | ||
: tane::TaneCommon(relation_manager) { | ||
RegisterOptions(); | ||
} | ||
|
||
config::ErrorType PFDTane::CalculateZeroAryPFDError(ColumnData const* rhs, | ||
ColumnLayoutRelationData const*) { | ||
std::size_t max = 1; | ||
model::PositionListIndex const* x_pli = rhs->GetPositionListIndex(); | ||
for (Cluster const& x_cluster : x_pli->GetIndex()) { | ||
max = std::max(max, x_cluster.size()); | ||
} | ||
return 1.0 - static_cast<double>(max) / x_pli->GetRelationSize(); | ||
} | ||
|
||
config::ErrorType PFDTane::CalculatePFDError(model::PositionListIndex const* x_pli, | ||
model::PositionListIndex const* xa_pli, | ||
ErrorMeasure measure, | ||
ColumnLayoutRelationData const*) { | ||
std::deque<Cluster> xa_index = xa_pli->GetIndex(); | ||
std::shared_ptr<Cluster const> probing_table = x_pli->CalculateAndGetProbingTable(); | ||
std::sort(xa_index.begin(), xa_index.end(), | ||
[&probing_table](Cluster const& a, Cluster const& b) { | ||
return probing_table->at(a.front()) < probing_table->at(b.front()); | ||
}); | ||
double sum = 0.0; | ||
std::size_t cluster_rows_count = 0; | ||
std::deque<Cluster> const& x_index = x_pli->GetIndex(); | ||
auto xa_cluster_it = xa_index.begin(); | ||
|
||
for (Cluster const& x_cluster : x_index) { | ||
std::size_t max = 1; | ||
for (int x_row : x_cluster) { | ||
if (xa_cluster_it == xa_index.end()) { | ||
break; | ||
} | ||
if (x_row == xa_cluster_it->at(0)) { | ||
max = std::max(max, xa_cluster_it->size()); | ||
xa_cluster_it++; | ||
} | ||
} | ||
sum += measure == +ErrorMeasure::per_tuple ? static_cast<double>(max) | ||
: static_cast<double>(max) / x_cluster.size(); | ||
cluster_rows_count += x_cluster.size(); | ||
} | ||
unsigned int unique_rows = | ||
static_cast<unsigned int>(x_pli->GetRelationSize() - cluster_rows_count); | ||
double probability = static_cast<double>(sum + unique_rows) / | ||
(measure == +ErrorMeasure::per_tuple ? x_pli->GetRelationSize() | ||
: x_index.size() + unique_rows); | ||
return 1.0 - probability; | ||
} | ||
|
||
config::ErrorType PFDTane::CalculateZeroAryFdError(ColumnData const* rhs) { | ||
return CalculateZeroAryPFDError(rhs, relation_.get()); | ||
} | ||
|
||
config::ErrorType PFDTane::CalculateFdError(model::PositionListIndex const* lhs_pli, | ||
model::PositionListIndex const* joint_pli) { | ||
return CalculatePFDError(lhs_pli, joint_pli, error_measure_, relation_.get()); | ||
} | ||
|
||
void PFDTane::RegisterOptions() { | ||
RegisterOption(config::kErrorMeasureOpt(&error_measure_)); | ||
} | ||
|
||
void PFDTane::MakeExecuteOptsAvailableFDInternal() { | ||
MakeOptionsAvailable({config::kErrorOpt.GetName(), config::kErrorMeasureOpt.GetName()}); | ||
} | ||
|
||
} // namespace algos |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#pragma once | ||
|
||
#include "algorithms/fd/pli_based_fd_algorithm.h" | ||
#include "config/error/type.h" | ||
#include "enums.h" | ||
#include "model/table/column_data.h" | ||
#include "model/table/column_layout_relation_data.h" | ||
#include "model/table/position_list_index.h" | ||
#include "tane_common.h" | ||
|
||
namespace algos { | ||
|
||
class PFDTane : public tane::TaneCommon { | ||
private: | ||
ErrorMeasure error_measure_ = +ErrorMeasure::per_tuple; | ||
void RegisterOptions(); | ||
void MakeExecuteOptsAvailableFDInternal() final; | ||
config::ErrorType CalculateZeroAryFdError(ColumnData const* rhs) override; | ||
config::ErrorType CalculateFdError(model::PositionListIndex const* lhs_pli, | ||
model::PositionListIndex const* joint_pli) override; | ||
|
||
public: | ||
PFDTane(std::optional<ColumnLayoutRelationDataManager> relation_manager = std::nullopt); | ||
static config::ErrorType CalculateZeroAryPFDError( | ||
ColumnData const* rhs, ColumnLayoutRelationData const* relation_data); | ||
static config::ErrorType CalculatePFDError(model::PositionListIndex const* x_pli, | ||
model::PositionListIndex const* xa_pli, | ||
ErrorMeasure error_measure, | ||
ColumnLayoutRelationData const* relation_data); | ||
}; | ||
|
||
} // namespace algos |
Oops, something went wrong.