From dc3cf18eff365ff257d7ee5f707534d337b15e26 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Mon, 30 Sep 2024 21:44:11 +0200 Subject: [PATCH 1/9] feat(search): allows to read an initial solution from standard input --- README.md | 39 +++++++++++++++++++++++++++ app/search.cpp | 34 +++++++++++++++++------- external/paradiseo | 2 +- src/include/moCombination.hpp | 50 +++++++++++++++++------------------ 4 files changed, 89 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 37b15b1..7ca5f3f 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,42 @@ To run the tests, use the following commands: mkdir -p build && cd build && cmake -CMAKE_BUILD_TYPE=Debug .. && make && make test ``` + +## Command Line Interface + +The `search` executable is the main interface to run the optimization solver. +It takes options of the form `--long[=]` or `-s[=]`. +The equal sign is mandatory, spaces between option flag and value are not supported. +A file holding a set of options can be passed using `@`. +Running the executable once produces a `search.status` file with the last setup, +and be called backed using, e.g.: `./search @search.status`. + +It allows running: + +- a mono-objective hill-climbing, with `--algo=HC` (the default), +- a mono-objective simulated annealing, with `--algo=SA`, +- a bi-objective evolutionary algorithm, with `--algo=NSGA2`. + +Solution are encoded as a vector of indices toward a set of pre-determined +parametrized hash function operators. +The parameters domain can be set with `--shift-*` and `--mult-*` options. + +One can pass an initial solution to HC and SA algorithms by using standard +input, for example: +```sh +echo "0 3 1 2 3 101" | ./search --func-len=3 --init-sol=1 +``` +will initialize the first solution with `𓉘r2𐙤a2𐙤l3𐙤m31𓉝` +(`m31` being added automatically to complete the forward hash). + +The solution encoding reads as: +``` +┌ Fitness (here a mono-objective one) +│ ┌ func-len +│ │ ┌ Operators indices +│ │ ┌─┴─┐ ┌ Total number of operators (i.e. past-the-max index) +0 3 1 2 3 101 +``` +Beware that you are responsible for aligning `--func-len` and the size of the +encoded solution. + diff --git a/app/search.cpp b/app/search.cpp index 47920f5..15b5931 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -205,7 +205,7 @@ int main(int argc, char* argv[]) "Increment step for multipliers (note: only odd multipliers will be allowed)", 'u', "Search domain").value(); Range mult_range(mult_min, mult_max, mult_step); - /***** Search domain arguments *****/ + /***** Solver arguments *****/ std::map algorithms; algorithms["HC"] = "Hill-Climbing [mono-objective]"; @@ -219,6 +219,10 @@ int main(int argc, char* argv[]) const std::string algo = argparser.createParam("HC", "algo", "Search metaheuristic"+msg.str(), 'a', "Algorithm").value(); + const bool init_sol = argparser.createParam(false, "init-sol", + "Read initial solution from standard input", 'I', "Algorithm").value(); + + // make_verbose(argparser); make_help(argparser); @@ -299,15 +303,27 @@ int main(int argc, char* argv[]) moLocalSearch& search = *palgo; CLUTCHLOG(note, "OK"); - CLUTCHLOG(progress, "Pick a random solution..."); - std::vector v; - v.reserve(func_len); - std::mt19937 rng(seed); - std::uniform_int_distribution uni(0, forge.size()-1); - for(size_t i=0; i v; + v.reserve(func_len); + std::mt19937 rng(seed); + std::uniform_int_distribution uni(0, forge.size()-1); + for(size_t i=0; i (*this)[index] = value; } - /** Accessor to the sequence. */ // FIXME remove - // ContainerType& get() { - // return *this; - // } - - // /** Accessor to an item of the sequence. */ - // AtomType& at(size_t index) { - // return _combination.at(index); - // } - - // /** Accessor to an item of the sequence. */ - // AtomType& operator[](size_t index) { - // return _combination[index]; - // } - - // /** The current size of the sequence. */ - // size_t size() const { - // return _combination.size(); - // } - /** The size of the possible indices. */ size_t nb_options() const { return _nb_options; } - /** Fancy print. */ + /** Serialize */ virtual void printOn(std::ostream& out) const override { assert(_is_init); - EO::printOn(out); // Fitness. - out << this->size(); - for(const auto i : *this) { - out << " " << i; - } + eoVector::printOn(out); + out << " " << this->nb_options(); + } + + /** Deserialize */ + virtual void readFrom(std::istream& in) override { + eoVector vec; // Fitness & vector + in >> vec; + this->clear(); + this->reserve(vec.size()); + std::copy(std::begin(vec), std::end(vec), std::back_inserter(*this)); + this->fitness(vec.fitness()); + + size_t nbo; + in >> nbo; + this->_nb_options = nbo; + + #ifndef NDEBUG + for(auto i : *this) { + assert(0 <= i and i < _nb_options); + } + this->_is_init = true; + #endif } //! Class name for state management. From 742e61d0bcee1fb8dca9bf6d696bb2329c4a151a Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Tue, 1 Oct 2024 09:49:38 +0200 Subject: [PATCH 2/9] feat(search): adds an option to allow some of the operators --- app/search.cpp | 87 +++++++++++++++++++++++++++++++++++----------- external/clutchlog | 2 +- 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/app/search.cpp b/app/search.cpp index 15b5931..a079f1b 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -64,32 +64,55 @@ struct Range { size_t step; }; -void make_domain(eoForgeVector< Operator >& forge, size_t value_size, Range shift_range, Range mult_range) +void make_domain(eoForgeVector< Operator >& forge, size_t value_size, Range shift_range, Range mult_range, std::set allowed_operators={"XorLeftShift","XorRightShift","AddShift","Multiply"} ) { + std::map op_count; + op_count["XorLeftShift"] = 0; + op_count["XorRightShift"] = 0; + op_count["AddShift"] = 0; + op_count["Multiply"] = 0; + for(size_t i = shift_range.min; i < shift_range.max; i+=shift_range.step) { - forge.add< XorLeftShift >(i, value_size); - CLUTCHLOG(xdebug, "XorLeftShift << " << i); + if(allowed_operators.contains("XorLeftShift")) { + forge.add< XorLeftShift >(i, value_size); + CLUTCHLOG(xdebug, "XorLeftShift << " << i); + op_count["XorLeftShift"] += 1; + } - forge.add< XorRightShift >(i, value_size); - CLUTCHLOG(xdebug, "XorRightShift << " << i); + if(allowed_operators.contains("XorRightShift")) { + forge.add< XorRightShift >(i, value_size); + CLUTCHLOG(xdebug, "XorRightShift << " << i); + op_count["XorRightShift"] += 1; + } - forge.add< AddShift >(i, value_size); - CLUTCHLOG(xdebug, "AddShift << " << i); + if(allowed_operators.contains("AddShift")) { + forge.add< AddShift >(i, value_size); + CLUTCHLOG(xdebug, "AddShift << " << i); + op_count["AddShift"] += 1; + } } - #ifndef NDEBUG - size_t nb_multipliers = 0; - #endif - for(size_t i = mult_range.min; i < shift_range.max; i+=mult_range.step) { - if(i % 2 == 1) { // Only odd multipliers are allowed. - forge.add< Multiply >(i, value_size); - CLUTCHLOG(xdebug, "Multiply * " << i); - #ifndef NDEBUG - nb_multipliers += 1; - #endif + if(allowed_operators.contains("Multiply")) { + size_t nb_multipliers = 0; + for(size_t i = mult_range.min; i < shift_range.max; i+=mult_range.step) { + if(i % 2 == 1) { // Only odd multipliers are allowed. + forge.add< Multiply >(i, value_size); + CLUTCHLOG(xdebug, "Multiply * " << i); + #ifndef NDEBUG + nb_multipliers += 1; + #endif + } } + ASSERT(nb_multipliers > 0); + op_count["Multiply"] = nb_multipliers; } - ASSERT(nb_multipliers > 0); + + CLUTCHLOG(note, "Domain contains " << forge.size() << " operator instances:"); + CLUTCHCODE(note, + for(auto kv : op_count) { + CLUTCHLOGD(note, kv.second << " operator " << kv.first, 1); + } + ); } std::string format_hashfunc(HashFunctionPair& hf, std::string indent = " ") @@ -159,6 +182,21 @@ std::string format_solution(const CombiMO& sol, const size_t value_size, eoForge return out.str(); } +std::set split_in_set(std::string str, const std::string sep = ",") +{ + std::set items; + size_t pos = 0; + std::string substr; + while((pos = str.find(sep)) != std::string::npos) { + substr = str.substr(0, pos); + items.insert(substr); + str.erase(0, pos + sep.length()); + } + items.insert(str); + + return items; +} + int main(int argc, char* argv[]) { CLUTCHLOG(progress, "Set config..."); @@ -205,6 +243,16 @@ int main(int argc, char* argv[]) "Increment step for multipliers (note: only odd multipliers will be allowed)", 'u', "Search domain").value(); Range mult_range(mult_min, mult_max, mult_step); + const std::string allowed_ops = argparser.createParam("XorLeftShift,XorRightShift,AddShift,Multiply", "operators", + "Operators allowed in the domain, as a comma-separated list", 'o', "Search domain").value(); + std::set allowed_operators = split_in_set(allowed_ops, ","); + const std::set all_operators = {"XorLeftShift","XorRightShift","AddShift","Multiply"}; + for(const std::string& s : allowed_operators) { + if(not all_operators.contains(s)) { + EXIT_ON_ERROR(Invalid_Argument, "Operator \"" << s << "\" is unknown"); + } + } + /***** Solver arguments *****/ std::map algorithms; @@ -269,8 +317,7 @@ int main(int argc, char* argv[]) CLUTCHLOG(progress, "Create the search domain..."); eoForgeVector< Operator > forge(/*always_reinstantiate*/true); - make_domain(forge, value_size, shift_range, mult_range); - CLUTCHLOG(info, forge.size() << " operators"); + make_domain(forge, value_size, shift_range, mult_range, allowed_operators); ASSERT(forge.size() > 0); CLUTCHLOG(note, "OK"); diff --git a/external/clutchlog b/external/clutchlog index 28f50d0..c8d148c 160000 --- a/external/clutchlog +++ b/external/clutchlog @@ -1 +1 @@ -Subproject commit 28f50d0badd8471e8ae3221ecd2ac9338ffa9bd3 +Subproject commit c8d148cf97eb8921a4f3e6df1a4fe45ca87b2f76 From 9af7ce7ab0563fc2f8b6fef835ed26b550520328 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Thu, 3 Oct 2024 10:04:00 +0200 Subject: [PATCH 3/9] feat(search): add CMA-ES for optimizing parameters only --- CMakeLists.txt | 10 ++ app/example.cpp | 4 +- app/search.cpp | 293 ++++++++++++++++++++++++++------------- external/paradiseo | 2 +- src/include/EvalFunc.hpp | 149 +++++++++++++++++++- test/EvalFull_T.cpp | 4 +- test/EvalMO_T.cpp | 8 +- 7 files changed, 361 insertions(+), 109 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index beec0b5..8cd87e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,16 @@ if(USE_LOCAL_PARADISEO) include_directories(${PARADISEO_ROOT}/eo/src) include_directories(${PARADISEO_ROOT}/mo/src) include_directories(${PARADISEO_ROOT}/moeo/src) + include_directories(${PARADISEO_ROOT}/edo/src) + # FIXME supposed to be set by Paradiseo, but failed to propagate here. + find_package(Eigen3) + if(EIGEN3_FOUND) + include_directories( ${EIGEN3_INCLUDE_DIR} ) + add_compile_definitions( WITH_EIGEN ) + else() + message(FATAL_ERROR "\n\nERROR: Eigen3 must be installed, e.g. `sudo apt install libeigen3-dev`.\n" ) + endif() + link_directories(${PARADISEO_BUILD}/lib) else() include_directories($ENV{PARADISEO_ROOT}/include/paradiseo/eo) diff --git a/app/example.cpp b/app/example.cpp index e309580..139af49 100644 --- a/app/example.cpp +++ b/app/example.cpp @@ -96,7 +96,7 @@ int main() using Min = eoMinimizingFitness; using Combi = moCombination; - eoForgeVector< EvalFull::OpItf > forge(/*always_reinstantiate*/true); + eoForgeVector< combi::EvalFull::OpItf > forge(/*always_reinstantiate*/true); forge.add< Multiply >( 9, value_size); forge.add< XorLeftShift >(17, value_size); forge.add< XorLeftShift >( 5, value_size); @@ -119,7 +119,7 @@ int main() CLUTCHLOG(debug, "Solution: " << sol); CLUTCHLOG(note, "Evaluate"); - EvalFull eval(value_size, forge); + combi::EvalFull eval(value_size, forge); eval(sol); diff --git a/app/search.cpp b/app/search.cpp index a079f1b..2f4d512 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -6,8 +6,13 @@ #include #include +#include #include #include +#include +#include +#include +#include #include "HashFunction.hpp" #include "Operator.hpp" @@ -26,7 +31,7 @@ using myuint = uint32_t; using Min = eoMinimizingFitness; using Combi = moCombination; -using CombiMO = moeoIntVector; +using CombiMO = moeoIntVector; //! Error codes returned on exit. enum class Error : unsigned char { @@ -145,7 +150,7 @@ std::string format_solution(const Combi& sol, const size_t value_size, eoForgeVe { CLUTCHLOG(progress, "Optimized solution:"); CLUTCHLOG(note, sol ); - auto hf = make_hashfuncs(sol, value_size, forge); + auto hf = combi::make_hashfuncs(sol, value_size, forge); CLUTCHLOG(progress, "Output optimized hash function:"); @@ -165,7 +170,7 @@ std::string format_solution(const CombiMO& sol, const size_t value_size, eoForge { CLUTCHLOG(progress, "Optimized solution:"); CLUTCHLOG(note, sol ); - auto hf = make_hashfuncs(sol, value_size, forge); + auto hf = combi::make_hashfuncs(sol, value_size, forge); CLUTCHLOG(progress, "Output optimized hash function:"); @@ -197,6 +202,21 @@ std::set split_in_set(std::string str, const std::string sep = ",") return items; } +std::vector split_in_vec(std::string str, const std::string sep = ",") +{ + std::vector items; + size_t pos = 0; + std::string substr; + while((pos = str.find(sep)) != std::string::npos) { + substr = str.substr(0, pos); + items.push_back(substr); + str.erase(0, pos + sep.length()); + } + items.push_back(str); + + return items; +} + int main(int argc, char* argv[]) { CLUTCHLOG(progress, "Set config..."); @@ -245,7 +265,7 @@ int main(int argc, char* argv[]) const std::string allowed_ops = argparser.createParam("XorLeftShift,XorRightShift,AddShift,Multiply", "operators", "Operators allowed in the domain, as a comma-separated list", 'o', "Search domain").value(); - std::set allowed_operators = split_in_set(allowed_ops, ","); + const std::set allowed_operators = split_in_set(allowed_ops, ","); const std::set all_operators = {"XorLeftShift","XorRightShift","AddShift","Multiply"}; for(const std::string& s : allowed_operators) { if(not all_operators.contains(s)) { @@ -256,9 +276,10 @@ int main(int argc, char* argv[]) /***** Solver arguments *****/ std::map algorithms; - algorithms["HC"] = "Hill-Climbing [mono-objective]"; - algorithms["SA"] = "Simulated-Annealing [mono-objective]"; - algorithms["NSGA2"] = "NSGAII [bi-objective]"; + algorithms["HC"] = "Hill-Climbing [mono-objective, fixed-size-combination]"; + algorithms["SA"] = "Simulated-Annealing [mono-objective, fixed-size-combination]"; + algorithms["NSGA2"] = "NSGAII [bi-objective, fixed-size-combination]"; + algorithms["CMAES"] = "CMA-ES [mono-objective, parametrize]"; std::ostringstream msg; msg << " ("; for(auto& kv : algorithms) { msg << kv.first << ":" << kv.second << ", "; @@ -270,6 +291,12 @@ int main(int argc, char* argv[]) const bool init_sol = argparser.createParam(false, "init-sol", "Read initial solution from standard input", 'I', "Algorithm").value(); + const bool parametrize = argparser.createParam(false, "parametrize", + "Only tune the parameters and do not change the operators." + "This will interpret --operators= as a sequence of operators to be parametrized.", 'P', "Algorithm").value(); + + const size_t pop_size = argparser.createParam(100, "pop-size", + "Population size for evolutionary algorithms", 'p', "Algorithm").value(); // make_verbose(argparser); make_help(argparser); @@ -324,123 +351,193 @@ int main(int argc, char* argv[]) CLUTCHLOG(progress, "Instantiate solver..."); eo::rng.reseed(seed); - if( algo == "HC" or algo == "SA" ) { + if( not parametrize ) { + if( algo == "HC" or algo == "SA" ) { - using Nb = moCombinationNeighbor; - using NbHood = moCombinationNeighborhood; + using Nb = moCombinationNeighbor; + using NbHood = moCombinationNeighborhood; - EvalFull feval(value_size, forge); - EvalTest peval(feval); + combi::EvalFull feval(value_size, forge); + combi::EvalTest peval(feval); - NbHood neighborhood; + NbHood neighborhood; - // Continue search until exhaustion of the neighborhood. - moTrueContinuator until_end; - moCheckpoint check(until_end); - moBestFitnessStat best; - check.add(best); // Update the best state. + // Continue search until exhaustion of the neighborhood. + moTrueContinuator until_end; + moCheckpoint check(until_end); + moBestFitnessStat best; + check.add(best); // Update the best state. - // Hill climber, selecting a random solution among the equal-best ones. - std::unique_ptr< moLocalSearch > palgo; - if( algo == "HC" ) { - palgo = std::make_unique< moRandomBestHC >(neighborhood, feval, peval, check); - } else if( algo == "SA" ) { - palgo = std::make_unique< moSA >(neighborhood, feval, peval, check); - } - moLocalSearch& search = *palgo; - CLUTCHLOG(note, "OK"); - - Combi sol; - if( init_sol ) { - CLUTCHLOG(progress, "Read solution from standard input..."); - sol.readFrom(std::cin); - CLUTCHLOG(info, "Read solution: " << sol); - sol.invalidate(); // Always invalidate, in case fitness input is wrong.. - ASSERT(sol.size() == func_len); - ASSERT(sol.nb_options() == forge.size()); - - } else { - CLUTCHLOG(progress, "Pick a random solution..."); - std::vector v; - v.reserve(func_len); - std::mt19937 rng(seed); - std::uniform_int_distribution uni(0, forge.size()-1); - for(size_t i=0; i > palgo; + if( algo == "HC" ) { + palgo = std::make_unique< moRandomBestHC >(neighborhood, feval, peval, check); + } else if( algo == "SA" ) { + palgo = std::make_unique< moSA >(neighborhood, feval, peval, check); } - sol = Combi(v, forge.size()); - } + moLocalSearch& search = *palgo; + CLUTCHLOG(note, "OK"); + + Combi sol; + if( init_sol ) { + CLUTCHLOG(progress, "Read solution from standard input..."); + sol.readFrom(std::cin); + CLUTCHLOG(info, "Read solution: " << sol); + sol.invalidate(); // Always invalidate, in case fitness input is wrong.. + ASSERT(sol.size() == func_len); + ASSERT(sol.nb_options() == forge.size()); + + } else { + CLUTCHLOG(progress, "Pick a random solution..."); + std::vector v; + v.reserve(func_len); + std::mt19937 rng(seed); + std::uniform_int_distribution uni(0, forge.size()-1); + for(size_t i=0; i; - } else if( algo == "NSGA2" ) { + combi::EvalMO eval(value_size, forge); + eoPopLoopEval popEval(eval); - using ReVec = moeoRealVector; + // Crossover + eoQuadCloneOp xover; // TODO use a real crossover - EvalMO eval(value_size, forge); - eoPopLoopEval popEval(eval); + // Mutation + using MutWrapper = eoRealToIntMonOp; + eoDetUniformMutation mutreal(/*range*/1.5, /*nb*/1); // TODO tune + eoIntInterval bounds(0,forge.size()-1); + MutWrapper mutation(mutreal, bounds); - // Crossover - eoQuadCloneOp xover; // TODO use a real crossover + using InitWrapper = eoRealToIntInit; + eoRealVectorBounds rebounds(func_len, 0, forge.size()-1); + eoRealInitBounded initreal(rebounds); + InitWrapper init(initreal, bounds); - // Mutation - using MutWrapper = eoRealToIntMonOp; - eoDetUniformMutation mutreal(/*range*/1.5, /*nb*/1); // TODO tune - eoIntInterval bounds(0,forge.size()-1); - MutWrapper mutation(mutreal, bounds); + eoQuadGenOp genOp(xover); + eoSGATransform transform(xover, 0.1, mutation, 0.1); + eoGenContinue continuator(10); - using InitWrapper = eoRealToIntInit; - eoRealVectorBounds rebounds(func_len, 0, forge.size()-1); - eoRealInitBounded initreal(rebounds); - InitWrapper init(initreal, bounds); + // build NSGA-II + moeoNSGAII algo(20, eval, xover, 1.0, mutation, 1.0); + CLUTCHLOG(note, "OK"); - eoQuadGenOp genOp(xover); - eoSGATransform transform(xover, 0.1, mutation, 0.1); - eoGenContinue continuator(10); + CLUTCHLOG(progress, "Initialize population..."); + eoPop pop(20, init); + CLUTCHLOG(note, "OK"); - // build NSGA-II - moeoNSGAII algo(20, eval, xover, 1.0, mutation, 1.0); - CLUTCHLOG(note, "OK"); + CLUTCHLOG(progress, "Solver run..."); + algo(pop); + CLUTCHLOG(note, "OK"); - CLUTCHLOG(progress, "Initialize population..."); - eoPop pop(20, init); - CLUTCHLOG(note, "OK"); + auto sol = pop.best_element(); - CLUTCHLOG(progress, "Solver run..."); - algo(pop); - CLUTCHLOG(note, "OK"); + const std::string out = format_solution(sol, value_size, forge); + std::cout << out << std::endl; - auto sol = pop.best_element(); - const std::string out = format_solution(sol, value_size, forge); - std::cout << out << std::endl; + } else { // Unknown algo + std::ostringstream msg; + msg << "Unknown algorithm: " << algo << ", valid candidates are"; + for( auto& kv : algorithms) { + msg << ", " << kv.first << " (" << kv.second << ")"; + } + EXIT_ON_ERROR( Invalid_Argument, msg.str() ); + } + + } else { // parametrize == true + + if( algo == "CMAES" ) { + std::vector operators = split_in_vec(allowed_ops, ","); + size_t dim = operators.size(); + size_t max_eval = 100; + + using R = eoReal; + using CMA = edoNormalAdaptive; + + edoNormalAdaptive gaussian(dim); + + eoState state; + auto& obj_func = state.pack< param::EvalFull >(value_size, operators); + auto& eval = state.pack< eoEvalCounterThrowException >(obj_func, max_eval); + auto& pop_eval = state.pack< eoPopLoopEval >(eval); + + // TODO get rid of do_make* stuff + auto& eo_continue = do_make_continue( argparser, state, eval); + auto& pop_continue = do_make_checkpoint(argparser, state, eval, eo_continue); + + auto& best = state.pack< eoBestIndividualStat >(); + pop_continue.add( best ); + auto& distrib_continue = state.pack< edoContAdaptiveFinite >(); + // FIXME implement constraints: different bounds for shifts and multiply + odd multipliers + double bmin = std::min(shift_min, mult_min); + double bmax = std::max(shift_max, mult_max); + auto& gen = state.pack< eoUniformGenerator >(bmin, bmax); + auto& bounder = state.pack< edoBounderRng >(R(dim, bmin), R(dim, bmax), gen); - } else { - std::ostringstream msg; - msg << "Unknown algorithm: " << algo << ", valid candidates are"; - for( auto& kv : algorithms) { - msg << ", " << kv.first << " (" << kv.second << ")"; + auto& init = state.pack< eoInitFixedLength >(dim, gen); + + auto& selector = state.pack< eoRankMuSelect >(dim/2); + auto& estimator = state.pack< edoEstimatorNormalAdaptive >(gaussian); + + auto& sampler = state.pack< edoSamplerNormalAdaptive >(bounder); + auto& replacor = state.pack< eoCommaReplacement >(); + + auto& algo = state.pack< edoAlgoAdaptive >( + gaussian , pop_eval, selector, + estimator, sampler , replacor, + pop_continue, distrib_continue); + + CLUTCHLOG(progress, "Initialize population..."); + // auto& pop = do_make_pop(argparser, state, init); + eoPop pop; + pop.append(pop_size, init); + pop_eval(pop,pop); + CLUTCHLOG(note, "OK"); + + CLUTCHLOG(progress, "Solver run..."); + try { + algo(pop); + } catch (eoMaxEvalException& e) { + eo::log << eo::progress << "STOP" << std::endl; + } + CLUTCHLOG(note, "OK"); + + } else { // Unknown algo + std::ostringstream msg; + msg << "Unknown algorithm: " << algo << ", valid candidates are"; + for( auto& kv : algorithms) { + msg << ", " << kv.first << " (" << kv.second << ")"; + } + EXIT_ON_ERROR( Invalid_Argument, msg.str() ); } - EXIT_ON_ERROR( Invalid_Argument, msg.str() ); } } diff --git a/external/paradiseo b/external/paradiseo index 190a304..d3a2ab5 160000 --- a/external/paradiseo +++ b/external/paradiseo @@ -1 +1 @@ -Subproject commit 190a30495e2d9af10a6b4a4f70bc51c68847ad1a +Subproject commit d3a2ab5e843c22c864c6ed25119ca057c300240e diff --git a/src/include/EvalFunc.hpp b/src/include/EvalFunc.hpp index 7dba965..ae3eae0 100644 --- a/src/include/EvalFunc.hpp +++ b/src/include/EvalFunc.hpp @@ -16,6 +16,16 @@ class HashFunctionPair HashFunction reverse; }; +/********************************************************************************/ + +/** Combinatorial encoding + * + * i.e. A solution is a sequence of indices depending on a pre-pseudo-instantiated + * forge of operators. + */ + +namespace combi { + /** Instantiate the forward and reverse HashFunc from the given solution. * * @param sol the Paradiseo solution representing a hash function (i.e. a sequence of indices) @@ -59,6 +69,7 @@ HashFunctionPair make_hashfuncs( EOT& sol, size_t value_size, eoForgeVec return HashFunctionPair(hff, hfr); } + //! Evaluates a mono-objective solution from scratch. template class EvalFull : public eoEvalFunc< EOT > @@ -85,7 +96,7 @@ class EvalFull : public eoEvalFunc< EOT > virtual void operator()(EOT& sol) { CLUTCHLOG(xdebug, "Evaluate solution: " << sol); - auto hffr = make_hashfuncs(sol, m_value_size, m_forge); + auto hffr = make_hashfuncs(sol, m_value_size, m_forge); HashFunction hff = hffr.forward; HashFunction hfr = hffr.reverse; @@ -209,7 +220,7 @@ class EvalMO : public moeoEvalFunc { CLUTCHLOG(xdebug, "Evaluate solution: " << sol); - auto hffr = make_hashfuncs(sol, m_value_size, m_forge); + auto hffr = make_hashfuncs(sol, m_value_size, m_forge); HashFunction hff = hffr.forward; HashFunction hfr = hffr.reverse; @@ -227,5 +238,139 @@ class EvalMO : public moeoEvalFunc } }; +} // namespace comb + + +/********************************************************************************/ + +/** Parametrization encoding + * + * i.e. A solution is a sequence of parameter values, + * depending on a sequence of operators fixed at initialization. + */ +namespace param { + +/** Instantiate the forward and reverse HashFunc from the given solution. + * + * @param sol the Paradiseo solution representing a hash function (i.e. a sequence of indices) + * @param value_size The size (in bits) of the values to manipulate + * @param operators The sequence of operators types that forms the hash func to parametrize. + */ +template +HashFunctionPair make_hashfuncs( EOT& sol, size_t value_size, const std::vector& operators ) +{ + ASSERT(sol.size() > 0); + ASSERT(sol.size() == operators.size()); + + HashFunction hff(value_size); + + CLUTCHLOG(xdebug, "Instantiate " << sol.size() << " operators:"); + for(size_t i = 0; i < sol.size(); ++i) { + // CLUTCHLOG(xdebug, "Instantiate " << i << "th operator"); + + size_t param = static_cast(std::round(sol[i])); + + if(operators[i] == "XorLeftShift") { + hff.add_operator(std::make_shared< XorLeftShift >(param, value_size)); + CLUTCHLOGD(xdebug, "XorLeftShift << " << param << "(" << sol[i] << ")", 1); + + } else if(operators[i] == "XorRightShift") { + hff.add_operator(std::make_shared< XorRightShift >(param, value_size)); + CLUTCHLOGD(xdebug, "XorRightShift << " << param << "(" << sol[i] << ")", 1); + + } else if(operators[i] == "AddShift") { + hff.add_operator(std::make_shared< AddShift >(param, value_size)); + CLUTCHLOGD(xdebug, "AddShift << " << param << "(" << sol[i] << ")", 1); + + } else if(operators[i] == "Multiply") { + // Only odd multipliers are allowed. + if(param % 2 == 0) { // if even + if(param < sol[i]) { + param -= 1; + } else { + param += 1; + } + } + hff.add_operator(std::make_shared< Multiply >(param, value_size)); + CLUTCHLOGD(xdebug, "AddShift << " << param << "(" << sol[i] << ")", 1); + + } else { + std::ostringstream msg; + msg << "Unknown operator: " << operators[i]; + CLUTCHLOG(error, msg.str()); + throw std::runtime_error(msg.str()); + } + } + CLUTCHLOG(xdebug, hff.size() << "/" << sol.size() << " operators"); + ASSERT(hff.size() == sol.size()); + CLUTCHLOG(xdebug, "Incomplete hash function: " << hff.get_name()); + hff.complete_with_masks(); + CLUTCHLOG(xdebug, "Complete hash function: " << hff.get_name()); + + HashFunction hfr{hff.invert()}; + CLUTCHLOG(xdebug, "Inverted hash function: " << hfr.get_name()); + + #ifndef NDEBUG + // Set up the random number generator + std::random_device rd; // Obtain a random number from hardware + auto const seed{rd()}; + std::mt19937 gen(seed); // Seed the generator + std::uniform_int_distribution value_dist(0, (1U << 31) - 1); + for(size_t i=0; i<10; ++i) { + myuint value = static_cast(value_dist(gen)); + CLUTCHLOG(xdebug, "Check inversion on " << value); + myuint hashed {hff.apply(value)}; + myuint recovered {hfr.apply(hashed)}; + ASSERT(value == recovered); + } + #endif + + return HashFunctionPair(hff, hfr); +} + +//! Evaluates a mono-objective solution from scratch. +template +class EvalFull : public eoEvalFunc< EOT > +{ +public: + using OpItf = Operator; + +protected: + const size_t m_value_size; + const std::vector m_operators; + +public: + /** Constructor + * + * @param value_size The size (in bits) of the values to manipulate + * @param operators The sequence of operator names to be parametrized. + */ + EvalFull(size_t value_size, const std::vector& operators) : + m_value_size(value_size), + m_operators(operators) + { } + + //! Call interface. + virtual void operator()(EOT& sol) { + CLUTCHLOG(xdebug, "Evaluate solution: " << sol); + + auto hffr = make_hashfuncs(sol, m_value_size, m_operators); + + HashFunction hff = hffr.forward; + HashFunction hfr = hffr.reverse; + + // TODO: have a real objective function. + const double quality = hff.size() + hfr.size(); + + sol.fitness( quality ); + CLUTCHLOG(xdebug, "Evaluated solution: " << sol); + CLUTCHLOG(xdebug, "Evaluated hash function: " << hff.get_name()); + + ASSERT(not sol.invalid()); + } + +}; + +} // namespace param #endif // EVALFUNC_HPP diff --git a/test/EvalFull_T.cpp b/test/EvalFull_T.cpp index 61d6254..75d494a 100644 --- a/test/EvalFull_T.cpp +++ b/test/EvalFull_T.cpp @@ -20,7 +20,7 @@ TEST(EvalFull, LargeNeighborhood) using Min = eoMinimizingFitness; using Combi = moCombination; - eoForgeVector< EvalFull::OpItf > forge(/*always_reinstantiate*/true); + eoForgeVector< combi::EvalFull::OpItf > forge(/*always_reinstantiate*/true); forge.add< Multiply >( 9, value_size); forge.add< XorLeftShift >(17, value_size); forge.add< XorLeftShift >( 5, value_size); @@ -35,7 +35,7 @@ TEST(EvalFull, LargeNeighborhood) hood.init(sol, to); // {0,0} EXPECT_TRUE(hood.hasNeighbor(sol)); - EvalFull eval(value_size, forge); + combi::EvalFull eval(value_size, forge); while( hood.cont(sol) ) { eval(sol); diff --git a/test/EvalMO_T.cpp b/test/EvalMO_T.cpp index f88e4e7..f4bb3cd 100644 --- a/test/EvalMO_T.cpp +++ b/test/EvalMO_T.cpp @@ -22,9 +22,9 @@ TEST(EvalMO, LargeNeighborhood) const size_t length = 5; const size_t value_size = 31; using myuint = uint32_t; - using Combi = moeoIntVector; + using Combi = moeoIntVector; - eoForgeVector< EvalMO::OpItf > forge(/*always_reinstantiate*/true); + eoForgeVector< combi::EvalMO::OpItf > forge(/*always_reinstantiate*/true); forge.add< Multiply >( 9, value_size); forge.add< XorLeftShift >(17, value_size); forge.add< XorLeftShift >( 5, value_size); @@ -34,9 +34,9 @@ TEST(EvalMO, LargeNeighborhood) Combi sol(length, 0); - EvalMO eval(value_size, forge); + combi::EvalMO eval(value_size, forge); - using MutWrapper = eoRealToIntMonOp>; + using MutWrapper = eoRealToIntMonOp>; eoDetUniformMutation< typename MutWrapper::EOTreal > mutreal(/*range*/forge.size(), /*nb*/length); eoIntInterval bounds(0,forge.size()-1); MutWrapper mutint(mutreal, bounds); From 93d509774ff581299a4f23850c57539e52e644e0 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 4 Oct 2024 16:08:24 +0200 Subject: [PATCH 4/9] feat(operators): add bounds asserts to value_size + add get_shortname - assert that value_size < sizeof(myuint) - assert that shifts < value_size --- src/include/HashFunction.hpp | 38 +++++++++++++++++++++-------------- src/include/XorLeftShift.hpp | 6 +++++- src/include/XorRightShift.hpp | 6 +++++- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/include/HashFunction.hpp b/src/include/HashFunction.hpp index b39e569..caaeb95 100644 --- a/src/include/HashFunction.hpp +++ b/src/include/HashFunction.hpp @@ -37,7 +37,9 @@ class HashFunction HashFunction(size_t value_size, std::string const & function_name = "") : m_function_name(function_name), m_value_size(value_size) - {}; + { + assert(value_size < sizeof(myuint)*CHAR_BIT); + }; //! Copy constructor. HashFunction(const HashFunction & other) : @@ -67,25 +69,31 @@ class HashFunction std::string get_name() const { if(m_function_name.size() == 0) { - // Symbols from unicode codepages allowed in identifiers: 𓃊ㄍ𓉘𓉝𐙤 - // Allows to copy-paste the name as a legit C++ function name - // while still being readable. - const std::string sep = "𐙤"; - std::ostringstream os; - os << "hash𓐅" << m_operators.size() << "𓉘"; - if(m_operators.size() > 0) { - os << m_operators[0]->to_short(); - for(size_t i=1; ito_short(); - } - } - os << "𓉝"; - return os.str(); + return get_shortname(); } else { return m_function_name; } } + std::string get_shortname() const + { + // Symbols from unicode codepages allowed in identifiers: 𓃊ㄍ𓉘𓉝𐙤 + // Allows to copy-paste the name as a legit C++ function name + // while still being readable. + const std::string sep = "𐙤"; + std::ostringstream os; + os << "hash𓐅" << m_operators.size() << "𓉘"; + if(m_operators.size() > 0) { + os << m_operators[0]->to_short(); + for(size_t i=1; ito_short(); + } + } + os << "𓉝"; + return os.str(); + + } + /** Add an operator to the hash function * @param op The operator to add */ diff --git a/src/include/XorLeftShift.hpp b/src/include/XorLeftShift.hpp index bbc7e3a..7e86551 100644 --- a/src/include/XorLeftShift.hpp +++ b/src/include/XorLeftShift.hpp @@ -20,7 +20,11 @@ class XorLeftShift : public Operator size_t m_value_size; public: - XorLeftShift(size_t shifts, size_t value_size) : m_shifts(shifts), m_value_size(value_size) {} + XorLeftShift(size_t shifts, size_t value_size) : m_shifts(shifts), m_value_size(value_size) + { + assert(shifts < value_size); + } + XorLeftShift(XorLeftShift const & other) : XorLeftShift(other.m_shifts, other.m_value_size) {} ~XorLeftShift() {} diff --git a/src/include/XorRightShift.hpp b/src/include/XorRightShift.hpp index f2b1780..7f9ceeb 100644 --- a/src/include/XorRightShift.hpp +++ b/src/include/XorRightShift.hpp @@ -20,7 +20,11 @@ class XorRightShift : public Operator size_t m_value_size; public: - XorRightShift(size_t shifts, size_t value_size) : m_shifts(shifts), m_value_size(value_size) {} + XorRightShift(size_t shifts, size_t value_size) : m_shifts(shifts), m_value_size(value_size) + { + assert(shifts < value_size); + } + XorRightShift(XorRightShift const & other) : XorRightShift(other.m_shifts, other.m_value_size) {} ~XorRightShift() {} From ae9a023db3a487a21f2944c27bc941b9f19efd48 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 4 Oct 2024 16:09:55 +0200 Subject: [PATCH 5/9] fix(eval): enforce shifts bounds in parametrization --- src/include/EvalFunc.hpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/include/EvalFunc.hpp b/src/include/EvalFunc.hpp index ae3eae0..90aeee3 100644 --- a/src/include/EvalFunc.hpp +++ b/src/include/EvalFunc.hpp @@ -271,16 +271,19 @@ HashFunctionPair make_hashfuncs( EOT& sol, size_t value_size, const std: size_t param = static_cast(std::round(sol[i])); if(operators[i] == "XorLeftShift") { - hff.add_operator(std::make_shared< XorLeftShift >(param, value_size)); + param = std::min(param, value_size-1); CLUTCHLOGD(xdebug, "XorLeftShift << " << param << "(" << sol[i] << ")", 1); + hff.add_operator(std::make_shared< XorLeftShift >(param, value_size)); } else if(operators[i] == "XorRightShift") { - hff.add_operator(std::make_shared< XorRightShift >(param, value_size)); + param = std::min(param, value_size-1); CLUTCHLOGD(xdebug, "XorRightShift << " << param << "(" << sol[i] << ")", 1); + hff.add_operator(std::make_shared< XorRightShift >(param, value_size)); } else if(operators[i] == "AddShift") { - hff.add_operator(std::make_shared< AddShift >(param, value_size)); + param = std::min(param, value_size-1); CLUTCHLOGD(xdebug, "AddShift << " << param << "(" << sol[i] << ")", 1); + hff.add_operator(std::make_shared< AddShift >(param, value_size)); } else if(operators[i] == "Multiply") { // Only odd multipliers are allowed. @@ -291,8 +294,8 @@ HashFunctionPair make_hashfuncs( EOT& sol, size_t value_size, const std: param += 1; } } - hff.add_operator(std::make_shared< Multiply >(param, value_size)); CLUTCHLOGD(xdebug, "AddShift << " << param << "(" << sol[i] << ")", 1); + hff.add_operator(std::make_shared< Multiply >(param, value_size)); } else { std::ostringstream msg; @@ -303,24 +306,28 @@ HashFunctionPair make_hashfuncs( EOT& sol, size_t value_size, const std: } CLUTCHLOG(xdebug, hff.size() << "/" << sol.size() << " operators"); ASSERT(hff.size() == sol.size()); - CLUTCHLOG(xdebug, "Incomplete hash function: " << hff.get_name()); + CLUTCHLOG(xdebug, "Partial hash function: " << hff.get_name()); hff.complete_with_masks(); CLUTCHLOG(xdebug, "Complete hash function: " << hff.get_name()); HashFunction hfr{hff.invert()}; - CLUTCHLOG(xdebug, "Inverted hash function: " << hfr.get_name()); + CLUTCHLOG(xdebug, "Inverted hash function: " << hfr.get_shortname()); #ifndef NDEBUG - // Set up the random number generator - std::random_device rd; // Obtain a random number from hardware + std::random_device rd; auto const seed{rd()}; - std::mt19937 gen(seed); // Seed the generator + std::mt19937 gen(seed); std::uniform_int_distribution value_dist(0, (1U << 31) - 1); for(size_t i=0; i<10; ++i) { myuint value = static_cast(value_dist(gen)); CLUTCHLOG(xdebug, "Check inversion on " << value); - myuint hashed {hff.apply(value)}; - myuint recovered {hfr.apply(hashed)}; + myuint hashed = hff.apply(value); + myuint recovered = hfr.apply(hashed); + if( value != recovered ) { + std::clog << hff.to_string() << std::endl; + std::clog << hfr.to_string() << std::endl; + CLUTCHLOG(error, value << " != " << recovered); + } ASSERT(value == recovered); } #endif From d961307ae731db727a4c45dad40b3964e0e34a5b Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 4 Oct 2024 16:11:17 +0200 Subject: [PATCH 6/9] fix(search): enforce bounds, eval in try, display result code --- app/search.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/app/search.cpp b/app/search.cpp index 2f4d512..a0d0aa1 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -243,14 +243,16 @@ int main(int argc, char* argv[]) const size_t value_size = argparser.createParam(31, "value-size", "Value size (in bits)", 'v', "Search domain").value(); + ASSERT(value_size < sizeof(myuint)*CHAR_BIT); const size_t func_len = argparser.createParam(3, "func-len", "Number of operations in the hash function", 'n', "Search domain").value(); const size_t shift_min = argparser.createParam(2, "shift-min", "Minimum number of shifts", 't', "Search domain").value(); - const size_t shift_max = argparser.createParam(31, "shift-max", + const size_t shift_max = argparser.createParam(30, "shift-max", "Maximum number of shifts", 'T', "Search domain").value(); + ASSERT(shift_max < value_size); const size_t shift_step = argparser.createParam(1, "shift-step", "Increment step for number of shifts", 'i', "Search domain").value(); Range shift_range(shift_min, shift_max, shift_step); @@ -330,8 +332,8 @@ int main(int argc, char* argv[]) if(shift_min == 0) { EXIT_ON_ERROR(InconsistentDomain, "It makes no sense to set `--shift-min` to zero."); } - if(shift_max < value_size) { - EXIT_ON_ERROR(InconsistentDomain, "It makes no sense to set --shift-max=" << shift_max << " < --value-size=" << value_size <<""); + if(shift_max >= value_size) { + EXIT_ON_ERROR(InconsistentDomain, "It makes no sense to set --shift-max=" << shift_max << " >= --value-size=" << value_size <<""); } if(mult_min < 3) { CLUTCHLOG(warning, "It is probably wrong that `--mult-min` is less than 3."); @@ -477,7 +479,7 @@ int main(int argc, char* argv[]) if( algo == "CMAES" ) { std::vector operators = split_in_vec(allowed_ops, ","); size_t dim = operators.size(); - size_t max_eval = 100; + size_t max_eval = 100000; using R = eoReal; using CMA = edoNormalAdaptive; @@ -520,17 +522,23 @@ int main(int argc, char* argv[]) // auto& pop = do_make_pop(argparser, state, init); eoPop pop; pop.append(pop_size, init); - pop_eval(pop,pop); CLUTCHLOG(note, "OK"); CLUTCHLOG(progress, "Solver run..."); try { + pop_eval(pop,pop); algo(pop); } catch (eoMaxEvalException& e) { eo::log << eo::progress << "STOP" << std::endl; } CLUTCHLOG(note, "OK"); + auto sol = pop.best_element(); + + auto hf = param::make_hashfuncs(sol, value_size, operators); + std::cout << format_hashfunc(hf) << std::endl; + + } else { // Unknown algo std::ostringstream msg; msg << "Unknown algorithm: " << algo << ", valid candidates are"; From 4a478f1422d113d3c9692ab93221b2f0694b0c66 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 11 Oct 2024 10:15:01 +0200 Subject: [PATCH 7/9] refactor(objfunc): use AvalancheTest to assess quality - Refactor AvalancheTest to have a common interface compatible with composition. - Allows letting the user decides whether to use strict or soft instances. - Allows instantiating the test object only once. - Allows indicating a default number of tests. - Uses the `operator()` interface instead of `run`, to stay consistent with ParadisEO. - Use tests to assess quality in evaluators. - Use StrictAvalancheTest in app/search. --- app/example.cpp | 20 ++++---- app/search.cpp | 16 ++++-- external/googletest | 2 +- src/include/AvalancheTest.hpp | 94 +++++++++++++++++++++-------------- src/include/EvalFunc.hpp | 43 ++++++++++------ test/EvalFull_T.cpp | 3 +- test/EvalMO_T.cpp | 3 +- 7 files changed, 112 insertions(+), 69 deletions(-) diff --git a/app/example.cpp b/app/example.cpp index 139af49..99bd380 100644 --- a/app/example.cpp +++ b/app/example.cpp @@ -50,29 +50,29 @@ int main() // Evaluates the hash function // SoftAvalancheTest soft_test{hashFunc}; // CLUTCHLOG(progress, "Run SoftAvalancheTest"); - // CLUTCHLOG(note, " 1 000 000 iterations:\t" << soft_test.run(value_size * 1000000UL)); - // CLUTCHLOG(note, " 10 000 000 iterations:\t" << soft_test.run(value_size * 10000000UL)); - // CLUTCHLOG(note, " 100 000 000 iterations:\t" << soft_test.run(value_size * 100000000UL)); - // CLUTCHLOG(note, "1 000 000 000 iterations:\t" << soft_test.run(value_size * 1000000000UL)); + // CLUTCHLOG(note, " 1 000 000 iterations:\t" << soft_test(hashFunc, value_size * 1000000UL)); + // CLUTCHLOG(note, " 10 000 000 iterations:\t" << soft_test(hashFunc, value_size * 10000000UL)); + // CLUTCHLOG(note, " 100 000 000 iterations:\t" << soft_test(hashFunc, value_size * 100000000UL)); + // CLUTCHLOG(note, "1 000 000 000 iterations:\t" << soft_test(hashFunc, value_size * 1000000000UL)); - StrictAvalancheTest strict_test{hashFunc}; + StrictAvalancheTest strict_test{hashFunc.get_value_size()}; CLUTCHLOG(progress, "Run SoftAvalancheTest"); for (size_t i = 0; i < 20; i++) { - CLUTCHLOG(note, " 10 000 iterations:\t" << strict_test.run(value_size * 10000UL)); + CLUTCHLOG(note, " 10 000 iterations:\t" << strict_test(hashFunc, value_size * 10000UL)); } std::cout << std::endl; for (size_t i = 0; i < 20; i++) { - CLUTCHLOG(note, " 100 000 iterations:\t" << strict_test.run(value_size * 100000UL)); + CLUTCHLOG(note, " 100 000 iterations:\t" << strict_test(hashFunc, value_size * 100000UL)); } std::cout << std::endl; std::cout << std::endl; for (size_t i = 0; i < 20; i++) { - CLUTCHLOG(note, " 1 000 000 iterations:\t" << strict_test.run(value_size * 1000000UL)); + CLUTCHLOG(note, " 1 000 000 iterations:\t" << strict_test(hashFunc, value_size * 1000000UL)); } - CLUTCHLOG(note, "100 000 000 iterations:\t" << strict_test.run(value_size * 100000000UL)); + CLUTCHLOG(note, "100 000 000 iterations:\t" << strict_test(hashFunc, value_size * 100000000UL)); CLUTCHLOG(note, "Invert"); // Get the inverse function @@ -119,7 +119,7 @@ int main() CLUTCHLOG(debug, "Solution: " << sol); CLUTCHLOG(note, "Evaluate"); - combi::EvalFull eval(value_size, forge); + combi::EvalFull eval(value_size, forge, strict_test); eval(sol); diff --git a/app/search.cpp b/app/search.cpp index a0d0aa1..e61b603 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -300,6 +300,12 @@ int main(int argc, char* argv[]) const size_t pop_size = argparser.createParam(100, "pop-size", "Population size for evolutionary algorithms", 'p', "Algorithm").value(); + /***** Objective Functions arguments *****/ + + const size_t nb_tests = argparser.createParam(1000, "nb-tests", + "Number of tests performed to assess the quality (whether the hash function distributes closely related k-mers uniformly in binary space)", 'x', "Objective Functions").value(); + + // make_verbose(argparser); make_help(argparser); @@ -353,13 +359,15 @@ int main(int argc, char* argv[]) CLUTCHLOG(progress, "Instantiate solver..."); eo::rng.reseed(seed); + StrictAvalancheTest test(value_size, /*nb_tests*/nb_tests); + if( not parametrize ) { if( algo == "HC" or algo == "SA" ) { using Nb = moCombinationNeighbor; using NbHood = moCombinationNeighborhood; - combi::EvalFull feval(value_size, forge); + combi::EvalFull feval(value_size, forge, test); combi::EvalTest peval(feval); NbHood neighborhood; @@ -426,7 +434,7 @@ int main(int argc, char* argv[]) using ReVec = moeoRealVector; - combi::EvalMO eval(value_size, forge); + combi::EvalMO eval(value_size, forge, test); eoPopLoopEval popEval(eval); // Crossover @@ -487,7 +495,7 @@ int main(int argc, char* argv[]) edoNormalAdaptive gaussian(dim); eoState state; - auto& obj_func = state.pack< param::EvalFull >(value_size, operators); + auto& obj_func = state.pack< param::EvalFull >(value_size, operators, test); auto& eval = state.pack< eoEvalCounterThrowException >(obj_func, max_eval); auto& pop_eval = state.pack< eoPopLoopEval >(eval); @@ -529,7 +537,7 @@ int main(int argc, char* argv[]) pop_eval(pop,pop); algo(pop); } catch (eoMaxEvalException& e) { - eo::log << eo::progress << "STOP" << std::endl; + CLUTCHLOG(progress, "STOP, reached max number of evaluations"); } CLUTCHLOG(note, "OK"); diff --git a/external/googletest b/external/googletest index ff233bd..a1e255a 160000 --- a/external/googletest +++ b/external/googletest @@ -1 +1 @@ -Subproject commit ff233bdd4cac0a0bf6e5cd45bda3406814cb2796 +Subproject commit a1e255a582377e1006bb88a408ac3f933ba7c916 diff --git a/src/include/AvalancheTest.hpp b/src/include/AvalancheTest.hpp index 1e7004a..c06c873 100644 --- a/src/include/AvalancheTest.hpp +++ b/src/include/AvalancheTest.hpp @@ -4,26 +4,27 @@ #ifndef AVALANCHE_HPP #define AVALANCHE_HPP - template -class SoftAvalancheTest +class AvalancheTest { -private: - // The hash function to test - HashFunction m_hash_function; +protected: + const size_t m_value_size; // Random number generator std::random_device rd; - std::mt19937 gen; - std::uniform_int_distribution dis; + std::mt19937 m_gen; + std::uniform_int_distribution m_dis; + const size_t default_nb_tests; public: /** Constructor * * @param hash_function The hash function to test */ - SoftAvalancheTest(HashFunction hash_function) : - m_hash_function(hash_function), - gen(rd()), dis(0, (static_cast(1) << hash_function.get_value_size()) - 1) + AvalancheTest(const size_t value_size, const size_t nb_tests = 100) : + m_value_size(value_size), + m_gen(rd()), + m_dis(0, (static_cast(1) << value_size) - 1), + default_nb_tests(nb_tests) { } /** Run the test @@ -31,10 +32,38 @@ class SoftAvalancheTest * @param nb_tests The number of tests to run * @return The percentage of bits that changed */ - double run(size_t nb_tests) + virtual double operator()(HashFunction& hash_function, size_t nb_tests = 0) = 0; + + size_t get_value_size() const { + return m_value_size; + } +}; + + +template +class SoftAvalancheTest : public AvalancheTest +{ +public: + /** Constructor + * + * @param hash_function The hash function to test + */ + SoftAvalancheTest(const size_t value_size, const size_t nb_tests = 0) : + AvalancheTest(value_size, nb_tests) + { } + + /** Run the test + * + * @param nb_tests The number of tests to run + * @return The percentage of bits that changed + */ + double operator()(HashFunction& hash_function, size_t nb_tests = 0) + { + if(nb_tests == 0) {nb_tests = this->default_nb_tests;} + // Number of bits where the function is encoded - size_t const nb_bits {m_hash_function.get_value_size()}; + size_t const nb_bits {hash_function.get_value_size()}; // Total number of bit differences size_t total_diff {0}; @@ -44,12 +73,12 @@ class SoftAvalancheTest for (size_t i{0} ; im_dis(this->m_gen)}; // flip a random bit in A to get B myuint const B {A ^ (static_cast(1) << bit_position)}; // Apply the hash function to A and B - myuint const hash_A {m_hash_function.apply(A)}; - myuint const hash_B {m_hash_function.apply(B)}; + myuint const hash_A {hash_function.apply(A)}; + myuint const hash_B {hash_function.apply(B)}; // Count the number of bits that changed total_diff += __builtin_popcount(hash_A ^ hash_B); // Move to the next bit to flip in the next iteration @@ -57,33 +86,23 @@ class SoftAvalancheTest } double const expected_diff {(nb_bits * nb_tests) / 2.0}; - + // Return the percentage of bits that changed return (static_cast(total_diff) - expected_diff) / expected_diff; } }; - template -class StrictAvalancheTest +class StrictAvalancheTest : public AvalancheTest { -private: - // The hash function to test - HashFunction m_hash_function; - // Random number generator - std::random_device rd; - std::mt19937 gen; - std::uniform_int_distribution dis; - public: /** Constructor * * @param hash_function The hash function to test */ - StrictAvalancheTest(HashFunction hash_function) : - m_hash_function(hash_function), - gen(rd()), dis(0, (static_cast(1) << hash_function.get_value_size()) - 1) + StrictAvalancheTest(const size_t value_size, const size_t nb_tests = 0) : + AvalancheTest(value_size, nb_tests) { } /** Run the test @@ -91,10 +110,12 @@ class StrictAvalancheTest * @param nb_tests The number of tests to run * @return The percentage of bits that changed */ - double run(size_t nb_tests) + double operator()(HashFunction& hash_function, size_t nb_tests = 0) { + if(nb_tests == 0) {nb_tests = this->default_nb_tests;} + // Number of bits where the function is encoded - size_t const nb_bits {m_hash_function.get_value_size()}; + size_t const nb_bits {hash_function.get_value_size()}; // Difference matrix std::vector> diff_matrix(nb_bits, std::vector(nb_bits, 0)); @@ -104,15 +125,15 @@ class StrictAvalancheTest for (size_t i{0} ; im_dis(this->m_gen)}; + myuint const hash_A {hash_function.apply(A)}; // flip a bit in A to get B and hash it myuint const B {A ^ (static_cast(1) << bit_position)}; - myuint const hash_B {m_hash_function.apply(B)}; + myuint const hash_B {hash_function.apply(B)}; myuint diff {hash_A ^ hash_B}; - + // Register the output bits that changed for (size_t b_pos{0} ; b_pos #include "log.h" +#include "AvalancheTest.hpp" //! Structure to gather forward and reverse hash functions, as outputed by make_hashfuncs. template @@ -16,6 +17,7 @@ class HashFunctionPair HashFunction reverse; }; + /********************************************************************************/ /** Combinatorial encoding @@ -23,7 +25,6 @@ class HashFunctionPair * i.e. A solution is a sequence of indices depending on a pre-pseudo-instantiated * forge of operators. */ - namespace combi { /** Instantiate the forward and reverse HashFunc from the given solution. @@ -80,6 +81,7 @@ class EvalFull : public eoEvalFunc< EOT > protected: const size_t m_value_size; eoForgeVector& m_forge; + AvalancheTest& m_test; public: /** Constructor @@ -87,10 +89,13 @@ class EvalFull : public eoEvalFunc< EOT > * @param value_size The size (in bits) of the values to manipulate * @param forge The set of possible parametrized hash operators. */ - EvalFull(size_t value_size, eoForgeVector& forge) : + EvalFull(size_t value_size, eoForgeVector& forge, AvalancheTest& test) : m_value_size(value_size), - m_forge(forge) - { } + m_forge(forge), + m_test(test) + { + ASSERT(value_size == test.get_value_size()); // FIXME we could use test.get_value_size() + } //! Call interface. virtual void operator()(EOT& sol) { @@ -101,8 +106,9 @@ class EvalFull : public eoEvalFunc< EOT > HashFunction hff = hffr.forward; HashFunction hfr = hffr.reverse; - // TODO: have a real objective function. - const double quality = hff.size() + hfr.size(); + ASSERT(hff.get_value_size() == m_value_size); + const double quality = m_test(hff); + // NOTE: do we want to aggregate runtime as well? sol.fitness( quality ); CLUTCHLOG(xdebug, "Evaluated solution: " << sol); @@ -203,6 +209,7 @@ class EvalMO : public moeoEvalFunc protected: const size_t m_value_size; eoForgeVector& m_forge; + AvalancheTest& m_test; public: /** Constructor @@ -210,10 +217,13 @@ class EvalMO : public moeoEvalFunc * @param value_size The size (in bits) of the values to manipulate * @param forge The set of possible parametrized hash operators. */ - EvalMO(size_t value_size, eoForgeVector& forge) : + EvalMO(size_t value_size, eoForgeVector& forge, AvalancheTest& test) : m_value_size(value_size), - m_forge(forge) - { } + m_forge(forge), + m_test(test) + { + ASSERT(value_size == test.get_value_size()); // FIXME we could use test.get_value_size() + } //! Call interface. void operator()(MOEOT& sol) @@ -225,8 +235,8 @@ class EvalMO : public moeoEvalFunc HashFunction hff = hffr.forward; HashFunction hfr = hffr.reverse; - // TODO: have a real objective function. - const QualityAndRuntime::Type quality = hff.size() * hfr.size(); + ASSERT(hff.get_value_size() == m_value_size); + const QualityAndRuntime::Type quality = m_test(hff); const QualityAndRuntime::Type runtime = hff.size() + hfr.size(); sol.objectiveVector(0, quality ); @@ -345,6 +355,7 @@ class EvalFull : public eoEvalFunc< EOT > protected: const size_t m_value_size; const std::vector m_operators; + AvalancheTest& m_test; public: /** Constructor @@ -352,9 +363,10 @@ class EvalFull : public eoEvalFunc< EOT > * @param value_size The size (in bits) of the values to manipulate * @param operators The sequence of operator names to be parametrized. */ - EvalFull(size_t value_size, const std::vector& operators) : + EvalFull(size_t value_size, const std::vector& operators, AvalancheTest& test) : m_value_size(value_size), - m_operators(operators) + m_operators(operators), + m_test(test) { } //! Call interface. @@ -366,8 +378,9 @@ class EvalFull : public eoEvalFunc< EOT > HashFunction hff = hffr.forward; HashFunction hfr = hffr.reverse; - // TODO: have a real objective function. - const double quality = hff.size() + hfr.size(); + ASSERT(hff.get_value_size() == m_value_size); + const double quality = m_test(hff); + // NOTE: do we want to aggregate runtime as well? sol.fitness( quality ); CLUTCHLOG(xdebug, "Evaluated solution: " << sol); diff --git a/test/EvalFull_T.cpp b/test/EvalFull_T.cpp index 75d494a..b534e99 100644 --- a/test/EvalFull_T.cpp +++ b/test/EvalFull_T.cpp @@ -35,7 +35,8 @@ TEST(EvalFull, LargeNeighborhood) hood.init(sol, to); // {0,0} EXPECT_TRUE(hood.hasNeighbor(sol)); - combi::EvalFull eval(value_size, forge); + StrictAvalancheTest test(value_size, /*nb_tests*/100); + combi::EvalFull eval(value_size, forge, test); while( hood.cont(sol) ) { eval(sol); diff --git a/test/EvalMO_T.cpp b/test/EvalMO_T.cpp index f4bb3cd..5948dfc 100644 --- a/test/EvalMO_T.cpp +++ b/test/EvalMO_T.cpp @@ -34,7 +34,8 @@ TEST(EvalMO, LargeNeighborhood) Combi sol(length, 0); - combi::EvalMO eval(value_size, forge); + StrictAvalancheTest test(value_size, /*nb_tests*/100); + combi::EvalMO eval(value_size, forge, test); using MutWrapper = eoRealToIntMonOp>; eoDetUniformMutation< typename MutWrapper::EOTreal > mutreal(/*range*/forge.size(), /*nb*/length); From f3e8ed37b73da167acdc5015a6ea18bf87f11a96 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 11 Oct 2024 10:31:15 +0200 Subject: [PATCH 8/9] feat(search): add init-sol to parametrized search --- app/search.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/app/search.cpp b/app/search.cpp index e61b603..40e1c41 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -334,6 +334,8 @@ int main(int argc, char* argv[]) CLUTCHLOG(info, "mult-min = " << mult_min); CLUTCHLOG(info, "mult-max = " << mult_max); CLUTCHLOG(info, "mult-step = " << mult_step); + CLUTCHLOG(info, "pop-size = " << pop_size); + CLUTCHLOG(info, "nb-tests = " << nb_tests); if(shift_min == 0) { EXIT_ON_ERROR(InconsistentDomain, "It makes no sense to set `--shift-min` to zero."); @@ -530,6 +532,15 @@ int main(int argc, char* argv[]) // auto& pop = do_make_pop(argparser, state, init); eoPop pop; pop.append(pop_size, init); + R candidate; + if( init_sol ) { + CLUTCHLOG(progress, "Read solution from standard input..."); + candidate.readFrom(std::cin); + CLUTCHLOG(info, "Read solution: " << candidate); + candidate.invalidate(); // Always invalidate, in case fitness input is wrong.. + ASSERT(candidate.size() == func_len); + pop.push_back(candidate); + } CLUTCHLOG(note, "OK"); CLUTCHLOG(progress, "Solver run..."); From 6b4b5c68045bd99683f3477a3d3c94be6a0719e0 Mon Sep 17 00:00:00 2001 From: Johann Dreo Date: Fri, 11 Oct 2024 11:01:41 +0200 Subject: [PATCH 9/9] refactor(search): clean the logging of arguments --- app/search.cpp | 57 +++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/app/search.cpp b/app/search.cpp index 40e1c41..511a4a7 100644 --- a/app/search.cpp +++ b/app/search.cpp @@ -219,7 +219,6 @@ std::vector split_in_vec(std::string str, const std::string sep = " int main(int argc, char* argv[]) { - CLUTCHLOG(progress, "Set config..."); eoParser argparser(argc, argv); /***** Classical arguments *****/ @@ -236,6 +235,16 @@ int main(int argc, char* argv[]) const size_t log_depth = argparser.createParam(std::numeric_limits::max(), "log-depth", "Maximum stack depth above which logging is not allowed (the larger, the more is displayed)", 'D', "Logging").value(); + + clutchlog_config(); // common config + auto& log = clutchlog::logger(); + ASSERT(log.levels().contains(log_level)); + log.threshold(log_level); + log.depth(log_depth); + log.file(log_file); + log.func(log_func); + CLUTCHLOG(progress, "Set config..."); + unsigned long long seed = argparser.createParam(0, "seed", "Seed of the pseudo-random generator (0 = use number of seconds since The Epoch)", 's', "Parameters").value(); @@ -309,33 +318,33 @@ int main(int argc, char* argv[]) // make_verbose(argparser); make_help(argparser); - clutchlog_config(); // common config - auto& log = clutchlog::logger(); - ASSERT(log.levels().contains(log_level)); - log.threshold(log_level); - log.depth(log_depth); - log.file(log_file); - log.func(log_func); - if(seed == 0) { seed = std::time(nullptr); // Epoch } - CLUTCHLOG(info, "seed = " << seed); - CLUTCHLOG(info, "log-level = " << log_level); - CLUTCHLOG(info, "log-file = " << log_file); - CLUTCHLOG(info, "log-func = " << log_func); - CLUTCHLOG(info, "log-depth = " << log_depth); - CLUTCHLOG(info, "value-size = " << value_size); - CLUTCHLOG(info, "func-len = " << func_len); - CLUTCHLOG(info, "shift-min = " << shift_min); - CLUTCHLOG(info, "shift-max = " << shift_max); - CLUTCHLOG(info, "shift-step = " << shift_step); - CLUTCHLOG(info, "mult-min = " << mult_min); - CLUTCHLOG(info, "mult-max = " << mult_max); - CLUTCHLOG(info, "mult-step = " << mult_step); - CLUTCHLOG(info, "pop-size = " << pop_size); - CLUTCHLOG(info, "nb-tests = " << nb_tests); + CLUTCHLOGD(info, "seed = " << seed, 1); + CLUTCHLOGD(info, "log-level = " << log_level, 1); + CLUTCHLOGD(info, "log-file = " << log_file, 1); + CLUTCHLOGD(info, "log-func = " << log_func, 1); + CLUTCHLOGD(info, "log-depth = " << log_depth, 1); + CLUTCHLOGD(info, "value-size = " << value_size, 1); + CLUTCHLOGD(info, "func-len = " << func_len, 1); + CLUTCHLOGD(info, "shift-min = " << shift_min, 1); + CLUTCHLOGD(info, "shift-max = " << shift_max, 1); + CLUTCHLOGD(info, "shift-step = " << shift_step, 1); + CLUTCHLOGD(info, "mult-min = " << mult_min, 1); + CLUTCHLOGD(info, "mult-max = " << mult_max, 1); + CLUTCHLOGD(info, "mult-step = " << mult_step, 1); + CLUTCHLOGD(info, "parametrize= " << (parametrize? "ON" : "OFF"), 1); + CLUTCHCODE(info, + std::ostringstream msg; + for(auto op : split_in_vec(allowed_ops, ",")) {msg << " 𐙤 " << op;} + CLUTCHLOGD(info, "operators " << msg.str(), 1); + ); + CLUTCHLOGD(info, "init-sol = " << (init_sol? "ON" : "OFF"), 1); + CLUTCHLOGD(info, "algo = " << algo, 1); + CLUTCHLOGD(info, "pop-size = " << pop_size, 1); + CLUTCHLOGD(info, "nb-tests = " << nb_tests, 1); if(shift_min == 0) { EXIT_ON_ERROR(InconsistentDomain, "It makes no sense to set `--shift-min` to zero.");