From 4bf03faa715140aca9312cd7b673cf590a69ede2 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 30 Mar 2021 19:11:09 +0200
Subject: [PATCH 01/40] Technology mapper

---
 experiments/mapper.cpp                        |  110 +
 .../cut_enumeration/exact_map_cut.hpp         |  103 +
 .../cut_enumeration/tech_map_cut.hpp          |   99 +
 include/mockturtle/algorithms/mapper.hpp      | 2648 +++++++++++++++++
 include/mockturtle/io/genlib_reader.hpp       |    4 +-
 include/mockturtle/utils/tech_library.hpp     |  558 ++++
 lib/kitty/kitty/npn.hpp                       |   83 +
 test/algorithms/mapper.cpp                    |  197 ++
 test/io/genlib_reader.cpp                     |    5 +
 test/utils/tech_library.cpp                   |  164 +
 10 files changed, 3970 insertions(+), 1 deletion(-)
 create mode 100644 experiments/mapper.cpp
 create mode 100644 include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
 create mode 100644 include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
 create mode 100644 include/mockturtle/algorithms/mapper.hpp
 create mode 100644 include/mockturtle/utils/tech_library.hpp
 create mode 100644 test/algorithms/mapper.cpp
 create mode 100644 test/utils/tech_library.cpp
diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
new file mode 100644
index 000000000..bd4f89687
--- /dev/null
+++ b/experiments/mapper.cpp
@@ -0,0 +1,110 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <string>
+#include <vector>
+
+#include <fmt/format.h>
+#include <lorina/aiger.hpp>
+#include <lorina/genlib.hpp>
+#include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/algorithms/mapper.hpp>
+#include <mockturtle/io/aiger_reader.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/views/depth_view.hpp>
+
+
+#include <experiments.hpp>
+
+std::string const mcnc_library =  "GATE   inv1    1	O=!a;		        PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3	  3	O=!a;		        PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero	  0	O=0;\n"
+                                  "GATE   one		  0	O=1;";
+
+int main()
+{
+  using namespace experiments;
+  using namespace mockturtle;
+
+  experiment<std::string, uint32_t, double, uint32_t, float, float, bool> exp( "mapper", "benchmark", "size", "area_after", "depth", "delay_after", "runtime", "equivalent" );
+
+  fmt::print( "[i] processing technology library\n" );
+
+  std::vector<gate> gates;
+
+  std::istringstream in( mcnc_library );
+  if ( lorina::read_genlib( in, genlib_reader( gates ) ) != lorina::return_code::success )
+  {
+    std::cout << "ERROR IN" << std::endl;
+    std::abort();
+    return -1;
+  }
+
+  tech_library<5> lib( gates );
+
+  for ( auto const& benchmark : epfl_benchmarks() )
+  {
+    fmt::print( "[i] processing {}\n", benchmark );
+    aig_network aig;
+    lorina::read_aiger( benchmark_path( benchmark ), aiger_reader( aig ) );
+
+    const uint32_t size_before = aig.num_gates();
+    const uint32_t depth_before = depth_view( aig ).depth();
+
+    map_params ps;
+    ps.cut_enumeration_ps.cut_size = 5;
+    map_stats st;
+
+    auto res = tech_map( aig, lib, ps, &st );
+
+    const auto cec = benchmark == "hyp" ? true : abc_cec( res, benchmark );
+
+    exp( benchmark, size_before, st.area, depth_before, st.delay, to_seconds( st.time_total ), cec );
+  }
+
+  exp.save();
+  exp.table();
+
+  return 0;
+}
diff --git a/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
new file mode 100644
index 000000000..9a3455b98
--- /dev/null
+++ b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
@@ -0,0 +1,103 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file exact_map_cut.hpp
+  \brief Cut enumeration for mapping with exact synthesis
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include <cstdint>
+#include <iomanip>
+#include <iostream>
+
+#include "../cut_enumeration.hpp"
+
+namespace mockturtle
+{
+
+/*! \brief Cut implementation based on ABC's giaMf.c
+
+  See <a href="https://github.com/berkeley-abc/abc/blob/master/src/aig/gia/giaMf.c">giaMf.c</a> in ABC's repository.
+*/
+struct cut_enumeration_exact_map_cut
+{
+  uint32_t delay{0};
+  float flow{0};
+  uint8_t match_index{0};
+  bool ignore{false};
+};
+
+template<bool ComputeTruth>
+bool operator<( cut_type<ComputeTruth, cut_enumeration_exact_map_cut> const& c1, cut_type<ComputeTruth, cut_enumeration_exact_map_cut> const& c2 )
+{
+  constexpr auto eps{0.005f};
+  if ( c1->data.flow < c2->data.flow - eps )
+    return true;
+  if ( c1->data.flow > c2->data.flow + eps )
+    return false;
+  if ( c1->data.delay < c2->data.delay )
+    return true;
+  if ( c1->data.delay > c2->data.delay )
+    return false;
+  return c1.size() < c2.size();
+}
+
+template<>
+struct cut_enumeration_update_cut<cut_enumeration_exact_map_cut>
+{
+  template<typename Cut, typename NetworkCuts, typename Ntk>
+  static void apply( Cut& cut, NetworkCuts const& cuts, Ntk const& ntk, node<Ntk> const& n )
+  {
+    uint32_t delay{0};
+    float flow = 1.0f;
+
+    for ( auto leaf : cut )
+    {
+      const auto& best_leaf_cut = cuts.cuts( leaf )[0];
+      delay = std::max( delay, best_leaf_cut->data.delay );
+      flow += best_leaf_cut->data.flow;
+    }
+
+    cut->data.delay = 1 + delay;
+    cut->data.flow = flow / ntk.fanout_size( n );
+  }
+};
+
+template<int MaxLeaves>
+std::ostream& operator<<( std::ostream& os, cut<MaxLeaves, cut_data<false, cut_enumeration_exact_map_cut>> const& c )
+{
+  os << "{ ";
+  std::copy( c.begin(), c.end(), std::ostream_iterator<uint32_t>( os, " " ) );
+  os << "}, D = " << std::setw( 3 ) << c->data.delay << " A = " << c->data.flow;
+  return os;
+}
+
+} // namespace mockturtle
diff --git a/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
new file mode 100644
index 000000000..61905c356
--- /dev/null
+++ b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
@@ -0,0 +1,99 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file tech_map_cut.hpp
+  \brief Cut enumeration for technology mapping
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include <cstdint>
+#include <iomanip>
+#include <iostream>
+
+#include "../cut_enumeration.hpp"
+
+namespace mockturtle
+{
+
+struct cut_enumeration_tech_map_cut
+{
+  uint32_t delay{0};
+  float flow{0};
+  uint8_t match_index{0};
+  bool ignore{false};
+};
+
+template<bool ComputeTruth>
+bool operator<( cut_type<ComputeTruth, cut_enumeration_tech_map_cut> const& c1, cut_type<ComputeTruth, cut_enumeration_tech_map_cut> const& c2 )
+{
+  constexpr auto eps{0.005f};
+  if ( c1.size() < c2.size() )
+    return true;
+  if ( c1.size() > c2.size() )
+    return false;
+  if ( c1->data.delay < c2->data.delay )
+    return true;
+  if ( c1->data.delay > c2->data.delay )
+    return false;
+  return c1->data.flow < c2->data.flow - eps;
+}
+
+template<>
+struct cut_enumeration_update_cut<cut_enumeration_tech_map_cut>
+{
+  template<typename Cut, typename NetworkCuts, typename Ntk>
+  static void apply( Cut& cut, NetworkCuts const& cuts, Ntk const& ntk, node<Ntk> const& n )
+  {
+    uint32_t delay{0};
+    float flow = 1.0f;
+
+    for ( auto leaf : cut )
+    {
+      const auto& best_leaf_cut = cuts.cuts( leaf )[0];
+      delay = std::max( delay, best_leaf_cut->data.delay );
+      flow += best_leaf_cut->data.flow;
+    }
+
+    cut->data.delay = 1 + delay;
+    cut->data.flow = flow / ntk.fanout_size( n );
+  }
+};
+
+template<int MaxLeaves>
+std::ostream& operator<<( std::ostream& os, cut<MaxLeaves, cut_data<false, cut_enumeration_tech_map_cut>> const& c )
+{
+  os << "{ ";
+  std::copy( c.begin(), c.end(), std::ostream_iterator<uint32_t>( os, " " ) );
+  os << "}, D = " << std::setw( 3 ) << c->data.delay << " A = " << c->data.flow;
+  return os;
+}
+
+} // namespace mockturtle
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
new file mode 100644
index 000000000..1f96ea046
--- /dev/null
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -0,0 +1,2648 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file mapper.hpp
+  \brief Mapper
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+
+#include <fmt/format.h>
+
+#include "../utils/stopwatch.hpp"
+#include "../utils/node_map.hpp"
+#include "../utils/tech_library.hpp"
+#include "../views/topo_view.hpp"
+#include "../networks/klut.hpp"
+#include "cut_enumeration.hpp"
+#include "cut_enumeration/tech_map_cut.hpp"
+#include "cut_enumeration/exact_map_cut.hpp"
+
+namespace mockturtle
+{
+
+/*! \brief Parameters for lut_mapping.
+ *
+ * The data structure `lut_mapping_params` holds configurable parameters
+ * with default arguments for `lut_mapping`.
+ */
+struct map_params
+{
+  map_params()
+  {
+    cut_enumeration_ps.cut_size = 4;
+    cut_enumeration_ps.cut_limit = 25;
+    cut_enumeration_ps.minimize_truth_table = true;
+  }
+
+  /*! \brief Parameters for cut enumeration
+   *
+   * The default cut size is 4, the default cut limit is 8.
+   */
+  cut_enumeration_params cut_enumeration_ps{};
+
+  /*! \brief Required time for delay optimization. */
+  double required_time{0.0f};
+
+  /*! \brief Do area optimization. */
+  bool skip_delay_round{false};
+
+  /*! \brief Number of rounds for area flow optimization. */
+  uint32_t area_flow_rounds{1u};
+
+  /*! \brief Number of rounds for exact area optimization. */
+  uint32_t ela_rounds{2u};
+
+  /*! \brief Use structural choices. */
+  bool choices{false};
+
+  /*! \brief Be verbose. */
+  bool verbose{false};
+};
+
+/*! \brief Statistics for mapper.
+ *
+ * The data structure `mapper_stats` provides data collected by running
+ * `mapper`.
+ */
+struct map_stats
+{
+  /*! \brief Area and delay results. */
+  double area{0};
+  double delay{0};
+
+  /*! \brief Runtime. */
+  stopwatch<>::duration time_mapping{0};
+  stopwatch<>::duration time_total{0};
+
+  /*! \brief Cut enumeration stats. */
+  cut_enumeration_stats cut_enumeration_st{};
+
+  /*! \brief Delay and area stats for each round. */
+  std::vector<std::string> round_stats{};
+
+  /*! \brief Gates usage stats. */
+  std::string gates_usage{};
+
+  /*! \brief Mapping error. */
+  bool mapping_error{false};
+
+  void report() const
+  {
+    for ( auto const& stat : round_stats )
+    {
+      std::cout << stat;
+    }
+    std::cout << fmt::format( "[i] Area = {:>5.2f}; Delay = {:>5.2f}\n", area, delay );
+    std::cout << fmt::format( "[i] Mapping runtime = {:>5.2f} secs\n", to_seconds( time_mapping ) );
+    std::cout << fmt::format( "[i] Total runtime   = {:>5.2f} secs\n", to_seconds( time_total ) );
+    std::cout << "[i] Gates usage report:\n" << gates_usage;
+  }
+};
+
+namespace detail
+{
+
+template<unsigned NInputs>
+struct node_match_tech
+{
+  /* best gate match for positive and negative output phases */
+  supergate<NInputs> const* best_supergate[2] = {nullptr, nullptr};
+  /* fanin pin phases for both output phases */
+  uint8_t phase[2];
+  /* best cut index for both phases */
+  uint32_t best_cut[2];
+  /* node is mapped using only one phase */
+  bool same_match{false};
+
+  /* arrival time at node output */
+  double arrival[2];
+  /* required time at node output */
+  double required[2];
+  /* area of the best matches */
+  float area[2];
+
+  /* number of references in the cover 0: pos, 1: neg, 2: pos+neg */
+  uint32_t map_refs[3];
+  /* references estimation */
+  float est_refs[3];
+  /* area flow */
+  float flows[3];
+};
+
+
+template<class Ntk, unsigned NInputs, typename CutData>
+class tech_map_impl
+{
+public:
+  using network_cuts_t = network_cuts<Ntk, true, CutData>;
+  using cut_t = typename network_cuts_t::cut_t;
+  using supergate_t = std::array<std::vector<supergate<NInputs>> const*, 2>;
+  using klut_map = std::unordered_map<uint32_t, std::array<signal<klut_network>, 2>>;
+
+public:
+  tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps, map_stats& st )
+      : ntk( ntk ),
+        library( library ),
+        ps( ps ),
+        st( st ),
+        node_match( ntk.size() ),
+        matches(),
+        cuts( cut_enumeration<Ntk, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
+  {
+    std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+  }
+
+  klut_network run()
+  {
+    stopwatch t( st.time_mapping );
+
+    auto [res, old2new] = initialize_map_network();
+
+    /* compute and save topological order */
+    top_order.reserve( ntk.size() );
+    topo_view<Ntk>( ntk ).foreach_node( [this]( auto n ) {
+      top_order.push_back( n );
+    } );
+
+    /* match cuts with gates */
+    compute_matches();
+
+    /* init the data structure */
+    init_nodes();
+
+    /* compute mapping for delay */
+    if ( !ps.skip_delay_round )
+    {
+      if ( !compute_mapping<false>() )
+      {
+        return res;
+      }
+    }
+
+    /* compute mapping using global area flow */
+    while ( iteration < ps.area_flow_rounds + 1 )
+    {
+      compute_required_time();
+      if ( !compute_mapping<true>() )
+      {
+        return res;
+      }
+    }
+
+    /* compute mapping using exact area */
+    while ( iteration < ps.ela_rounds + ps.area_flow_rounds + 1 )
+    {
+      compute_required_time();
+      if ( !compute_mapping_exact_area() )
+      {
+        return res;
+      }
+    }
+
+    /* generate the output network */
+    finalize_cover( res, old2new );
+
+    return res;
+  }
+
+private:
+  void init_nodes()
+  {
+    ntk.foreach_node( [this]( auto const& n, auto ) {
+      const auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      if ( ntk.is_constant( n ) )
+      {
+        /* all terminals have flow 1.0 */
+        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.arrival[0] = node_data.arrival[1] = 0.0f;
+        match_constants( index );
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        /* all terminals have flow 1.0 */
+        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.arrival[0] = 0.0f;
+        /* PIs have the negative phase implemented with an inverter */
+        node_data.arrival[1] = lib_inv_delay;
+      }
+      else
+      {
+        node_data.est_refs[0] = node_data.est_refs[1] = 0.0f;
+        node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
+        ntk.foreach_fanin( n, [&]( auto const& s ) {
+          if ( !ntk.is_pi( ntk.get_node( s ) ) )
+          {
+            const auto c_index = ntk.node_to_index( ntk.get_node( s ) );
+            if ( ntk.is_complemented( s ) )
+              node_match[c_index].est_refs[1] += 1.0f;
+            else
+              node_match[c_index].est_refs[0] += 1.0f;
+          }
+        } );
+      }
+    } );
+  }
+
+
+  void compute_matches()
+  {
+    /* match gates */
+    ntk.foreach_gate( [&]( auto const& n ) {
+      const auto index = ntk.node_to_index( n );
+
+      std::vector<supergate_t> node_matches;
+
+      auto i = 0u;
+      for ( auto& cut : cuts.cuts( index ) )
+      {
+        if ( cut->size() == 1 )
+        {
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+        const auto tt = cuts.truth_table( *cut );
+        if ( tt.num_vars() > NInputs )
+        {
+          /* Ignore cuts too big to be mapped using the library */
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+        const auto fe = kitty::extend_to<NInputs>( tt );
+        auto const supergates_pos = library.get_supergates( fe );
+        auto const supergates_neg = library.get_supergates( ~fe );
+        if ( supergates_pos != nullptr || supergates_neg != nullptr )
+        {
+          supergate_t match{supergates_pos, supergates_neg};
+
+          node_matches.push_back( match );
+          ( *cut )->data.match_index = i++;
+        }
+        else
+        {
+          /* Ignore not matched cuts */
+          ( *cut )->data.ignore = true;
+        }
+      }
+      
+      matches[index] = node_matches;
+    } );
+  }
+
+  template<bool DO_AREA>
+  bool compute_mapping()
+  {
+    for ( auto const& n : top_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      /* match positive phase */
+      match_phase<DO_AREA>( n, 0u );
+
+      /* match negative phase */
+      match_phase<DO_AREA>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<DO_AREA, false>( n, 0 );
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<false>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      std::stringstream stats{};
+      float area_gain = 0.0f;
+
+      if ( iteration != 1 )
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+      if constexpr ( DO_AREA )
+      {
+        stats << fmt::format( "[i] AreaFlow : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      }
+      else
+      {
+        stats << fmt::format( "[i] Delay    : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      }
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+
+  bool compute_mapping_exact_area()
+  {
+    for ( auto const& n : top_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      /* recursively deselect the best cut shared between
+       * the two phases if in use in the cover */
+      if ( node_data.same_match && node_data.map_refs[2] != 0 )
+      {
+        if ( node_data.best_supergate[0] != nullptr )
+          cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0u );
+        else
+          cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1u );
+      }
+
+      /* match positive phase */
+      match_phase_exact( n, 0u );
+
+      /* match negative phase */
+      match_phase_exact( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<true, true>( n, 0 );
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<true>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::stringstream stats{};
+      stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+  template<bool ELA>
+  bool set_mapping_refs()
+  {
+    const auto coef = 1.0f / ( 2.0f + ( iteration + 1 ) * ( iteration + 1 ) );
+
+    if constexpr ( !ELA )
+    {
+      for ( auto i = 0u; i < node_match.size(); ++i )
+      {
+        node_match[i].map_refs[0] = node_match[i].map_refs[1] = node_match[i].map_refs[2] = 0u;
+      }
+    }
+
+    /* compute the current worst delay and update the mapping refs */
+    delay = 0.0f;
+    ntk.foreach_po( [this]( auto s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+
+      if ( ntk.is_complemented( s ) )
+        delay = std::max( delay, node_match[index].arrival[1] );
+      else
+        delay = std::max( delay, node_match[index].arrival[0] );
+
+      if constexpr ( !ELA )
+      {
+        node_match[index].map_refs[2]++;
+        if ( ntk.is_complemented( s ) )
+          node_match[index].map_refs[1]++;
+        else
+          node_match[index].map_refs[0]++;
+      }
+    } );
+
+    /* compute current area and update mapping refs in top-down order */
+    area = 0.0f;
+    for ( auto it = top_order.rbegin(); it != top_order.rend(); ++it )
+    {
+      const auto index = ntk.node_to_index( *it );
+      auto& node_data = node_match[index];
+
+      /* skip constants and PIs */
+      if ( ntk.is_constant( *it ) )
+      {
+        if ( node_match[index].map_refs[2] > 0u )
+        {
+          /* if used and not available in the library launch a mapping error */
+          if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          {
+            std::cerr << "[i] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl;
+            st.mapping_error = true;
+            return false;
+          }
+        }
+        continue;
+      }
+      else if ( ntk.is_pi( *it ) )
+      {
+        if ( node_match[index].map_refs[1] > 0u )
+        {
+          /* Add inverter area over the negated fanins */
+          area += lib_inv_area;
+        }
+        continue;
+      }
+
+      /* continue if not referenced in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        continue;
+
+      unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+
+      if ( node_data.best_supergate[use_phase] == nullptr )
+      {
+        /* Library is not complete, mapping is not possible */
+        std::cerr << "[i] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl;
+        st.mapping_error = true;
+        return false;
+      }
+
+      if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts.cuts( index )[node_data.best_cut[use_phase]];
+          auto ctr = 0u;
+
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+        if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 )
+        {
+          area += lib_inv_area;
+        }
+      }
+
+      /* invert the phase */
+      use_phase = use_phase ^ 1;
+
+      /* if both phases are implemented and used */
+      if ( !node_data.same_match && node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts.cuts( index )[node_data.best_cut[use_phase]];
+          auto ctr = 0u;
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+      }
+    }
+
+    /* blend estimated references */
+    for ( auto i = 0u; i < ntk.size(); ++i )
+    {
+      node_match[i].est_refs[2] = coef * node_match[i].est_refs[2] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[2] ) );
+      node_match[i].est_refs[1] = coef * node_match[i].est_refs[1] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[1] ) );
+      node_match[i].est_refs[0] = coef * node_match[i].est_refs[0] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[0] ) );
+    }
+
+    ++iteration;
+    return true;
+  }
+
+  void compute_required_time()
+  {
+    for ( auto i = 0u; i < node_match.size(); ++i )
+    {
+      node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<float>::max();
+    }
+    
+    /* return in case of `skip_delay_round` */
+    if ( iteration == 0 )
+      return;
+
+    auto required = delay;
+
+    if ( ps.required_time != 0.0f )
+    {
+      /* Global target time constraint */
+      if ( ps.required_time < delay - epsilon )
+      {
+        if ( !ps.skip_delay_round && iteration == 1 )
+          std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl;
+      }
+      else
+      {
+        required = ps.required_time;
+      }
+    }
+
+    /* set the required time at POs */
+    ntk.foreach_po( [&]( auto const& s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+      if ( ntk.is_complemented( s ) )
+        node_match[index].required[1] = required;
+      else
+        node_match[index].required[0] = required;
+    } );
+
+    /* propagate required time to the PIs */
+    auto i = ntk.size();
+    while ( i-- > 0u )
+    {
+      const auto n = ntk.index_to_node( i );
+      if ( ntk.is_pi( n ) || ntk.is_constant( n ) )
+        break;
+
+      if ( node_match[i].map_refs[2] == 0 )
+        continue;
+
+      auto& node_data = node_match[i];
+
+      unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+      unsigned other_phase = use_phase ^ 1;
+
+      assert( node_data.best_supergate[0] != nullptr || node_data.best_supergate[1] != nullptr );
+      assert( node_data.map_refs[0] || node_data.map_refs[1] );
+
+      /* propagate required time over the output inverter if present */
+      if ( node_data.same_match && node_data.map_refs[other_phase] > 0 )
+      {
+        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );   
+      }
+
+      if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+      {
+        auto ctr = 0u;
+        auto best_cut = cuts.cuts( i )[node_data.best_cut[use_phase]];
+        auto const& supergate = node_data.best_supergate[use_phase];
+        for ( auto leaf : best_cut )
+        {
+          auto phase = ( node_data.phase[use_phase] >> ctr ) & 1;
+          node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[use_phase] - supergate->tdelay[ctr] );
+          ++ctr;
+        }
+      }
+
+      if ( !node_data.same_match && node_data.map_refs[other_phase] > 0 )
+      {
+        auto ctr = 0u;
+        auto best_cut = cuts.cuts( i )[node_data.best_cut[other_phase]];
+        auto const& supergate = node_data.best_supergate[other_phase];
+        for ( auto leaf : best_cut )
+        {
+          auto phase = ( node_data.phase[other_phase] >> ctr ) & 1;
+          node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[other_phase] - supergate->tdelay[ctr] );
+          ++ctr;
+        }
+      }
+    }
+  }
+
+  template<bool DO_AREA>
+  void match_phase( node<Ntk> const& n, uint8_t phase )
+  {
+    float best_arrival = std::numeric_limits<float>::max();
+    float best_area_flow = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint8_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    auto& cut_matches = matches[index];
+    supergate<NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* recompute best match info */
+    if ( best_supergate != nullptr )
+    {
+      auto const& cut = cuts.cuts( index )[node_data.best_cut[phase]];
+
+      best_phase = node_data.phase[phase];
+      best_arrival = 0.0f;
+      best_area_flow = best_supergate->area + cut_leaves_flow( cut, n, phase );
+      best_area = best_supergate->area;
+      best_cut = node_data.best_cut[phase];
+      best_size = cut.size();
+
+      auto ctr = 0u;
+      for ( auto l : cut )
+      {
+        float arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        best_arrival = std::max( best_arrival, arrival_pin );
+        ++ctr;
+      }
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts.cuts( index ) )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->data.ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = cut_matches[( *cut )->data.match_index];
+
+      if ( supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates[phase] )
+      {
+        node_data.phase[phase] = gate.polarity;
+        float area_local = gate.area + cut_leaves_flow( *cut, n, phase );
+        float worst_arrival = 0.0f;
+
+        auto ctr = 0u;
+        for ( auto l : *cut )
+        {
+          float arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+          ++ctr;
+        }
+
+        if constexpr ( DO_AREA )
+        {
+          if ( worst_arrival > node_data.required[phase] + epsilon )
+            continue;
+        }
+
+        if ( compare_map<DO_AREA>( worst_arrival, best_arrival, area_local, best_area_flow, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_area_flow = area_local;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_area = gate.area;
+          best_phase = gate.polarity;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_area_flow;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+  }
+
+  void match_phase_exact( node<Ntk> const& n, uint8_t phase )
+  {
+    float best_arrival = std::numeric_limits<float>::max();
+    float best_exact_area = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint8_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    auto& cut_matches = matches[index];
+    supergate<NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* recompute best match info */
+    if ( best_supergate != nullptr )
+    {
+      auto const& cut = cuts.cuts( index )[node_data.best_cut[phase]];
+
+      best_phase = node_data.phase[phase];
+      best_arrival = 0.0f;
+      best_area = best_supergate->area;
+      best_cut = node_data.best_cut[phase];
+      best_size = cut.size();
+
+      auto ctr = 0u;
+      for ( auto l : cut )
+      {
+        float arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        best_arrival = std::max( best_arrival, arrival_pin );
+        ++ctr;
+      }
+
+      /* if cut is implemented, remove it from the cover */
+      if ( !node_data.same_match && node_data.map_refs[phase] )
+      {
+        best_exact_area = cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+      else
+      {
+        best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+        cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts.cuts( index ) )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->data.ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = cut_matches[( *cut )->data.match_index];
+
+      if ( supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates[phase] )
+      {
+        node_data.phase[phase] = gate.polarity;
+        node_data.area[phase] = gate.area;
+        auto area_exact = cut_ref( *cut, n, phase );
+        cut_deref( *cut, n, phase );
+        float worst_arrival = 0.0f;
+
+        auto ctr = 0u;
+        for ( auto l : *cut )
+        {
+          float arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+          ++ctr;
+        }
+
+        if ( worst_arrival > node_data.required[phase] + epsilon )
+          continue;
+
+        if ( compare_map<true>( worst_arrival, best_arrival, area_exact, best_exact_area, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_exact_area = area_exact;
+          best_area = gate.area;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_phase = gate.polarity;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_exact_area;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+
+    if ( !node_data.same_match && node_data.map_refs[phase] )
+    {
+      best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+    }
+  }
+
+  template<bool DO_AREA, bool ELA>
+  void match_drop_phase( node<Ntk> const& n, float required_margin_factor )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+
+    /* compute arrival adding an inverter to the other match phase */
+    float worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
+    float worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
+    bool use_zero = false;
+    bool use_one = false;
+
+    /* only one phase is matched */
+    if ( node_data.best_supergate[0] == nullptr )
+    {
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+      }
+      return;
+    }
+    else if ( node_data.best_supergate[1] == nullptr )
+    {
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+      }
+      return;
+    }
+
+    /* try to use only one match to cover both phases */
+    if constexpr ( !DO_AREA )
+    {
+      /* if arrival improves matching the other phase and inserting an inverter */
+      if ( worst_arrival_npos < node_data.arrival[0] + epsilon )
+      {
+        use_one = true;
+      }
+      if ( worst_arrival_nneg < node_data.arrival[1] + epsilon )
+      {
+        use_zero = true;
+      }
+    }
+    else
+    {
+      /* check if both phases + inverter meet the required time */
+      use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon - required_margin_factor * lib_inv_delay );
+      use_one = worst_arrival_npos < ( node_data.required[0] + epsilon - required_margin_factor * lib_inv_delay );
+    }
+
+    /* condition on not used phases, evaluate a substitution */
+    if constexpr ( DO_AREA )
+    {
+      if ( iteration != 0 )
+      {
+        if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 )
+        {
+          /* select the used match */
+          auto phase = 0;
+          auto nphase = 0;
+          if ( node_data.map_refs[0] == 0 )
+          {
+            phase = 1;
+            use_one = true;
+            use_zero = false;
+          }
+          else
+          {
+            nphase = 1;
+            use_one = false;
+            use_zero = true;
+          }
+          /* select the not used match instead if it leads to area improvement and doesn't violate the required time */
+          if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon )
+          {
+            auto size_phase = cuts.cuts( index )[node_data.best_cut[phase]].size();
+            auto size_nphase = cuts.cuts( index )[node_data.best_cut[nphase]].size();
+            auto inverter_cost = 0;
+            if ( ELA )
+              inverter_cost = lib_inv_area;
+            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase],  node_data.flows[nphase] + inverter_cost, node_data.flows[phase], size_nphase, size_phase ) )
+            {
+              /* invert the choice */
+              use_zero = !use_zero;
+              use_one = !use_one;
+            }
+          }
+        }
+      }
+    }
+
+    if ( !use_zero && !use_one )
+    {
+      /* use both phases */
+      node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0];
+      node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1];
+      node_data.flows[2] = node_data.flows[0] + node_data.flows[1];
+      node_data.same_match = false;
+      return;
+    }
+
+    /* use area flow as a tiebreaker */
+    if ( use_zero && use_one )
+    {
+      auto size_zero = cuts.cuts( index )[node_data.best_cut[0]].size();
+      auto size_one = cuts.cuts( index )[node_data.best_cut[1]].size();
+      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos,  node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+        use_one = false;
+      else
+        use_zero = false;
+    }
+
+    if ( use_zero )
+    {
+      if constexpr ( ELA )
+      {
+        /* set cut references */
+        if ( !node_data.same_match ) 
+        {
+          /* dereference the negative phase cut if in use */
+          if ( node_data.map_refs[1] > 0 )
+            cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          /* reference the positive cut if not in use before */
+          if ( node_data.map_refs[0] == 0 && node_data.map_refs[2] )
+            cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+      }
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+    }
+    else
+    {
+      if constexpr ( ELA )
+      {
+        /* set cut references */
+        if ( !node_data.same_match ) 
+        {
+          /* dereference the positive phase cut if in use */
+          if ( node_data.map_refs[0] > 0 )
+            cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          /* reference the negative cut if not in use before */
+          if ( node_data.map_refs[1] == 0 && node_data.map_refs[2] )
+            cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+      }
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+    }
+  }
+
+  inline void set_match_complemented_phase( uint32_t index, uint8_t phase, float worst_arrival_n )
+  {
+    auto& node_data = node_match[index];
+    auto phase_n = phase ^ 1;
+    node_data.same_match = true;
+    node_data.best_supergate[phase_n] = nullptr;
+    node_data.best_cut[phase_n] = node_data.best_cut[phase];
+    node_data.phase[phase_n] = node_data.phase[phase];
+    node_data.arrival[phase_n] = worst_arrival_n;
+    node_data.area[phase_n] = node_data.area[phase];
+    node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[2];
+    node_data.flows[phase_n] = node_data.flows[phase];
+    node_data.flows[2] = node_data.flows[phase];
+  }
+
+  void match_constants( uint32_t index )
+  {
+    auto& node_data = node_match[index];
+
+    kitty::static_truth_table<NInputs> zero_tt;
+    auto const supergates_zero = library.get_supergates( zero_tt );
+    auto const supergates_one = library.get_supergates( ~zero_tt );
+
+    /* Not available in the library */
+    if ( supergates_zero == nullptr && supergates_one == nullptr )
+    {
+      return;
+    }
+    /* if only one is available, the other is obtained using an inverter */
+    if ( supergates_zero != nullptr )
+    {
+      node_data.best_supergate[0] = &( ( *supergates_zero )[0] );
+      node_data.arrival[0] = node_data.best_supergate[0]->worstDelay;
+      node_data.area[0] = node_data.best_supergate[0]->area;
+      node_data.phase[0] = 0;
+    }
+    if ( supergates_one != nullptr )
+    {
+      node_data.best_supergate[1] = &( ( *supergates_one )[0] );
+      node_data.arrival[1] = node_data.best_supergate[1]->worstDelay;
+      node_data.area[1] = node_data.best_supergate[1]->area;
+      node_data.phase[1] = 0;
+    }
+    else
+    {
+      node_data.same_match = true;
+      node_data.arrival[1] = node_data.arrival[0] + lib_inv_delay;
+      node_data.area[1] = node_data.area[0] + lib_inv_area;
+      node_data.phase[1] = 1;
+    }
+    if ( supergates_zero == nullptr )
+    {
+      node_data.same_match = true;
+      node_data.arrival[0] = node_data.arrival[1] + lib_inv_delay;
+      node_data.area[0] = node_data.area[1] + lib_inv_area;
+      node_data.phase[0] = 1;
+    }
+  }
+
+  inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    float flow{0.0f};
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+
+    uint8_t ctr = 0u;
+    for ( auto leaf : cut )
+    {
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+      flow += node_match[leaf].flows[leaf_phase];
+    }
+
+    return flow;
+  }
+
+  float cut_ref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    float count = node_data.area[phase];
+
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* reference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( node_match[leaf].map_refs[1]++ == 0u )
+            count += lib_inv_area;
+        }
+        else
+        {
+          ++node_match[leaf].map_refs[0];
+        }
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+          count += lib_inv_area;
+        /* Recursive referencing if leaf was not referenced */
+        if ( node_match[leaf].map_refs[2]++ == 0u )
+        {
+          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        ++node_match[leaf].map_refs[2];
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u )
+        {
+          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+    }
+    return count;
+  }
+
+  float cut_deref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    float count = node_data.area[phase];
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* dereference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( --node_match[leaf].map_refs[1] == 0u )
+            count += lib_inv_area;
+        }
+        else
+        {
+          --node_match[leaf].map_refs[0];
+        }
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+          count += lib_inv_area;
+        /* Recursive dereferencing */
+        if ( --node_match[leaf].map_refs[2] == 0u )
+        {
+          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        --node_match[leaf].map_refs[2];
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u )
+        {
+          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+    }
+    return count;
+  }
+
+  std::pair<klut_network, klut_map> initialize_map_network()
+  {
+    klut_network dest;
+    klut_map old2new;
+
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][0] = dest.get_constant( false );
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][1] = dest.get_constant( true );
+    
+    ntk.foreach_pi( [&]( auto const& n ) {
+      old2new[ntk.node_to_index( n )][0] = dest.create_pi();
+    } );
+    return {dest, old2new};
+  }
+
+  void finalize_cover( klut_network& res, klut_map& old2new )
+  {
+    ntk.foreach_node( [&]( auto const& n ) {
+      if ( ntk.is_constant( n ) )
+        return true;
+
+      auto index = ntk.node_to_index( n );
+
+      /* add inverter at PI if needed */
+      if ( ntk.is_pi( n ) )
+      {
+        if ( node_match[index].map_refs[1] > 0 )
+          old2new[index][1] = res.create_not( old2new[n][0] );
+        return true;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        return true;
+
+      auto const& node_data = node_match[index];
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      /* add used cut */
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        create_lut_for_gate( res, old2new, index, phase);
+
+        /* add inverted version if used */
+        if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+          old2new[index][phase ^ 1] = res.create_not( old2new[index][phase] );
+      }
+
+      phase = phase ^ 1;
+      /* add the optional other match if used */
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        create_lut_for_gate( res, old2new, index, phase);
+      }
+
+      return true;
+    } );
+
+    /* create POs */
+    ntk.foreach_po( [&]( auto const& f ) {
+      if ( ntk.is_complemented( f ) )
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][1] );
+      }
+      else
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][0] );
+      }
+    } );
+
+    /* write final results */
+    st.area = area;
+    st.delay = delay;
+    compute_gates_usage();
+  }
+
+
+  void create_lut_for_gate( klut_network& res, klut_map& old2new, uint32_t index, unsigned phase )
+  {
+    auto const& node_data = node_match[index];
+    auto& best_cut = cuts.cuts( index )[node_data.best_cut[phase]];
+    auto const gate = node_data.best_supergate[phase]->root;
+    // auto tt = cuts.truth_table( best_cut );
+
+    /* check correctness */
+    /* invert the truth table if using the negative phase */
+    // if ( phase == 1 )
+    //   tt = ~tt;
+    // uint32_t neg = 0;
+    // for ( auto i = 0u; i < best_cut.size(); ++i )
+    // {
+    //   neg |= ( ( node_data.phase[phase] >> i ) & 1 ) << node_data.best_supergate[phase]->permutation[i];
+    // }
+    // auto check_tt = kitty::create_from_npn_config( std::make_tuple( tt, neg, node_data.best_supergate[phase]->permutation ) );
+    // assert( gate->function == check_tt );
+
+    /* permutate and negate to obtain the matched gate truth table */
+    std::vector<signal<klut_network>> children( best_cut.size() );
+
+    auto ctr = 0u;
+    for ( auto l : best_cut )
+    {
+      children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1];
+      ++ctr;
+    }
+    /* create the node */
+    auto f = res.create_node( children, gate->function );
+
+    /* add the node in the data structure */
+    old2new[index][phase] = f;
+  }
+
+  template<bool DO_AREA>
+  inline bool compare_map( float arrival, float best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size )
+  {
+    if constexpr ( DO_AREA )
+    {
+      if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+      else if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+    }
+    else
+    {
+      if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+      else if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+    }
+    if ( size < best_size )
+    {
+      return true;
+    }
+    return false;
+  }
+
+  void compute_gates_usage()
+  {
+    auto const& gates = library.get_gates();
+    std::vector<uint32_t> gates_profile( gates.size(), 0u );
+
+    bool ignore_inv = lib_inv_id == UINT32_MAX;
+
+    ntk.foreach_node( [&]( auto const& n, auto ) {
+      const auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          return true;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( !ignore_inv && node_data.map_refs[1] > 0 )
+          ++gates_profile[lib_inv_id];
+        return true;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        return true;
+
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        ++gates_profile[node_data.best_supergate[phase]->root->id];
+
+        if ( !ignore_inv && node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+          ++gates_profile[lib_inv_id];
+      }
+
+      phase = phase ^ 1;
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        ++gates_profile[node_data.best_supergate[phase]->root->id];
+      }
+
+      return true;
+    } );
+
+    std::stringstream gates_usage;
+    double tot_area = 0.0f;
+    uint32_t tot_instances = 0u;
+    for ( auto i = 0u; i < gates_profile.size(); ++i ) 
+    {
+      if ( gates_profile[i] > 0u )
+      {
+        auto tot_gate_area = gates_profile[i] * gates[i].area;
+
+        gates_usage << fmt::format( "[i] {:<15}", gates[i].name )
+                    << fmt::format( "\t Instance = {:>10d}", gates_profile[i] )
+                    << fmt::format( "\t Area = {:>12.2f}", tot_gate_area )
+                    << fmt::format( " {:>8.2f} %", tot_gate_area / area * 100 )
+                    << std::endl;
+
+        tot_instances += gates_profile[i];
+        tot_area += tot_gate_area;
+      }
+    }
+
+    gates_usage << fmt::format( "[i] {:<15}", "TOTAL" )
+                << fmt::format( "\t Instance = {:>10d}", tot_instances )
+                << fmt::format( "\t Area = {:>12.2f}   100.00 %", tot_area )
+                << std::endl;
+
+    st.gates_usage = gates_usage.str();
+  }
+
+private:
+  Ntk const& ntk;
+  tech_library<NInputs> const& library;
+  map_params const& ps;
+  map_stats& st;
+
+  uint32_t iteration{0}; /* current mapping iteration */
+  double delay{0.0f};    /* current delay of the mapping */
+  double area{0.0f};     /* current area of the mapping */
+  const float epsilon{0.005f}; /* epsilon */
+
+  /* lib inverter info */
+  float lib_inv_area;
+  float lib_inv_delay;
+  uint32_t lib_inv_id;
+
+  std::vector<node<Ntk>> top_order;
+  std::vector<node_match_tech<NInputs>> node_match;
+  std::unordered_map<uint32_t, std::vector<supergate_t>> matches;
+  network_cuts_t cuts;
+};
+
+} /* namespace detail */
+
+/*! \brief Technology mapping.
+ *
+ * This function implements a technology mapping algorithm. It is controlled by a
+ * template argument `CutData` (defaulted to `cut_enumeration_tech_map_cut`).
+ * The argument is similar to the `CutData` argument in `cut_enumeration`, which can
+ * specialize the cost function to select priority cuts and store additional data.
+ * The default argument gives priority firstly to the cut size, then delay, and lastly
+ * to area flow. Thus, it is more suited for delay-oriented mapping.
+ * The type passed as `CutData` must implement the following four fields:
+ *
+ * - `uint32_t delay`
+ * - `float flow`
+ * - `uint8_t match_index`
+ * - `bool ignore`
+ *
+ * See `include/mockturtle/algorithms/cut_enumeration/cut_enumeration_tech_map_cut.hpp`
+ * for one example of a CutData type that implements the cost function that is used in
+ * the technology mapper.
+ *
+ * The function returns a k-LUT network. Each LUT abstacts a gate of the technology library.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ *
+ * \param ntk Network
+ * \param library Technology library
+ * \param ps Mapping params
+ * \param pst Mapping statistics
+ */
+template<class Ntk, unsigned NInputs, typename CutData = cut_enumeration_tech_map_cut>
+klut_network tech_map( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
+
+  map_stats st;
+  detail::tech_map_impl<Ntk, NInputs, CutData> p( ntk, library, ps, st );
+  auto res = p.run();
+
+  st.time_total = st.time_mapping + st.cut_enumeration_st.time_total;
+  if ( ps.verbose && !st.mapping_error )
+  {
+    st.report();
+  }
+
+  if ( pst )
+  {
+    *pst = st;
+  }
+  return res;
+}
+
+
+namespace detail
+{
+
+template<typename Ntk, unsigned NInputs>
+struct cut_match_t
+{
+  /* list of supergates matching the cut for positive and negative output phases */
+  std::vector<exact_supergate<Ntk, NInputs>> const* supergates[2] = {nullptr, nullptr};
+  /* input permutations, at index i, it contains the permutated position of i */
+  std::array<uint8_t, NInputs> permutation{};
+  /* permutated input negations */
+  uint8_t negation{0};
+};
+
+template<typename Ntk, unsigned NInputs>
+struct node_match_t
+{
+  /* best supergate match for positive and negative output phases */
+  exact_supergate<Ntk, NInputs> const* best_supergate[2] = {nullptr, nullptr};
+  /* fanin pin phases for both output phases */
+  uint8_t phase[2];
+  /* best cut index for both phases */
+  uint32_t best_cut[2];
+  /* node is mapped using only one phase */
+  bool same_match{false};
+
+  /* arrival time at node output */
+  double arrival[2];
+  /* required time at node output */
+  double required[2];
+  /* area of the best matches */
+  float area[2];
+
+  /* number of references in the cover 0: pos, 1: neg, 2: pos+neg */
+  uint32_t map_refs[3];
+  /* references estimation */
+  float est_refs[3];
+  /* area flow */
+  float flows[3];
+};
+
+template<class NtkDest, class Ntk, class RewritingFn, typename CutData, unsigned NInputs>
+class exact_map_impl
+{
+public:
+  using network_cuts_t = network_cuts<Ntk, true, CutData>;
+  using cut_t = typename network_cuts_t::cut_t;
+
+public:
+  exact_map_impl( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps, map_stats& st )
+      : ntk( ntk ),
+        library( library ),
+        ps( ps ),
+        st( st ),
+        node_match( ntk.size() ),
+        matches(),
+        cuts( cut_enumeration<Ntk, true, CutData>( ntk, ps.cut_enumeration_ps ) )
+  {
+    std::tie( lib_inv_area, lib_inv_delay ) = library.get_inverter_info();
+  }
+
+  NtkDest run()
+  {
+    stopwatch t( st.time_mapping );
+
+    auto [res, old2new] = initialize_copy_network<NtkDest>( ntk );
+
+    /* compute and save topological order */
+    top_order.reserve( ntk.size() );
+    topo_view<Ntk>( ntk ).foreach_node( [this]( auto n ) {
+      top_order.push_back( n );
+    } );
+
+    /* match cuts with gates */
+    compute_matches();
+
+    /* init the data structure */
+    init_nodes();
+
+    /* compute mapping delay */
+    if ( !ps.skip_delay_round )
+    {
+      compute_mapping<false>();
+    }
+
+    /* compute mapping using global area flow */
+    while ( iteration < ps.area_flow_rounds + 1 )
+    {
+      compute_required_time();
+      compute_mapping<true>();
+    }
+
+    /* compute mapping using exact area */
+    while ( iteration < ps.ela_rounds + ps.area_flow_rounds + 1 )
+    {
+      compute_required_time();
+      compute_exact_area();
+    }
+
+    /* generate the output network using the computed mapping */
+    finalize_cover( res, old2new );
+
+    return res;
+  }
+
+private:
+  void init_nodes()
+  {
+    ntk.foreach_node( [this]( auto const& n, auto ) {
+      const auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      if ( ntk.is_constant( n ) )
+      {
+        /* all terminals have flow 1.0 */
+        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.arrival[0] = node_data.arrival[1] = 0.0f;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        /* all terminals have flow 1.0 */
+        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.arrival[0] = 0.0f;
+        node_data.arrival[1] = lib_inv_delay;
+      }
+      else
+      {
+        node_data.est_refs[0] = node_data.est_refs[1] = 0.0f;
+        node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
+        ntk.foreach_fanin( n, [&]( auto const& s ) {
+          if ( !ntk.is_pi( ntk.get_node( s ) ) )
+          {
+            const auto c_index = ntk.node_to_index( ntk.get_node( s ) );
+            if ( ntk.is_complemented( s ) )
+              node_match[c_index].est_refs[1] += 1.0f;
+            else
+              node_match[c_index].est_refs[0] += 1.0f;
+          }
+        } );
+      }
+    } );
+  }
+
+
+  void compute_matches()
+  {
+    /* match gates */
+    ntk.foreach_gate( [&]( auto const& n ) {
+      const auto index = ntk.node_to_index( n );
+
+      std::vector<cut_match_t<NtkDest, NInputs>> node_matches;
+
+      auto i = 0u;
+      for ( auto& cut : cuts.cuts( index ) )
+      {
+        if ( cut->size() == 1 )
+        {
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+
+        /* match the cut using canonization and get the gates */
+        const auto tt = cuts.truth_table( *cut );
+        const auto fe = kitty::extend_to<NInputs>( tt );
+        const auto config = kitty::exact_npn_canonization( fe );
+        auto const supergates_npn = library.get_supergates( std::get<0>( config ) );
+        auto const supergates_npn_neg = library.get_supergates( ~std::get<0>( config ) );
+
+        if ( supergates_npn != nullptr || supergates_npn_neg != nullptr )
+        {
+          auto neg = std::get<1>( config );
+          auto perm = std::get<2>( config );
+          uint8_t phase = ( neg >> NInputs ) & 1;
+          cut_match_t<NtkDest, NInputs> match;
+
+          match.supergates[phase] = supergates_npn;
+          match.supergates[phase ^ 1] = supergates_npn_neg;
+          
+          /* store permutations and negations */
+          match.negation = 0;
+          for ( auto j = 0u; j < perm.size() && j < NInputs; ++j )
+          {
+            match.permutation[perm[j]] = j;
+            match.negation |= ( ( neg >> perm[j] ) & 1 ) << j;
+          }
+          node_matches.push_back( match );
+          ( *cut )->data.match_index = i++;
+        }
+        else
+        {
+          /* Ignore not matched cuts */
+          ( *cut )->data.ignore = true;
+        }
+      }
+      
+      matches[index] = node_matches;
+    } );
+  }
+
+  template<bool DO_AREA>
+  void compute_mapping()
+  {
+    for ( auto const& n : top_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      /* match positive phase */
+      match_phase<DO_AREA>( n, 0u );
+
+      /* match negative phase */
+      match_phase<DO_AREA>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<DO_AREA, false>( n, 0u );
+    }
+    set_mapping_refs<false>();
+  }
+
+
+  void compute_exact_area()
+  {
+    for ( auto const& n : top_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      /* recursively deselect the best cut shared between
+       * the two phases if in use in the cover */
+      if ( node_data.same_match && node_data.map_refs[2] != 0 )
+      {
+        if ( node_data.best_supergate[0] != nullptr )
+          cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0u );
+        else
+          cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1u );
+      }
+
+      /* match positive phase */
+      match_phase_exact( n, 0u );
+
+      /* match negative phase */
+      match_phase_exact( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<true, true>( n, 0u );
+    }
+    set_mapping_refs<true>();
+  }
+
+
+  void finalize_cover( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
+  {
+    auto const& db = library.get_database();
+
+    ntk.foreach_node( [&]( auto const& n ) {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        return true;
+      auto index = ntk.node_to_index( n );
+      if ( node_match[index].map_refs[2] == 0u )
+        return true;
+
+      /* get the implemented phase and map the best cut */
+      unsigned phase = ( node_match[index].best_supergate[0] != nullptr ) ? 0 : 1;
+      auto& best_cut = cuts.cuts( index )[node_match[index].best_cut[phase]];
+
+      std::vector<signal<NtkDest>> children( NInputs, res.get_constant( false ) );
+      auto const& match = matches[index][best_cut->data.match_index];
+      auto const& supergate = node_match[index].best_supergate[phase];
+      auto ctr = 0u;
+      for ( auto l : best_cut )
+      {
+        children[match.permutation[ctr++]] = old2new[ntk.index_to_node( l )];
+      }
+      for ( auto i = 0u; i < NInputs; ++i )
+      {
+        if ( ( match.negation >> i ) & 1 )
+        {
+          children[i] = !children[i];
+        }
+      }
+      topo_view topo{db, supergate->root};
+      auto f = cleanup_dangling( topo, res, children.begin(), children.end() ).front();
+
+      if ( phase == 1 )
+        f = !f;
+      
+      old2new[n] = f;
+      return true;
+    } );
+
+    /* create POs */
+    ntk.foreach_po( [&]( auto const& f ) {
+      res.create_po( ntk.is_complemented( f ) ? res.create_not( old2new[f] ) : old2new[f] );
+    } );
+
+    /* write final results */
+    st.area = area;
+    st.delay = delay;
+  }
+
+  template<bool ELA>
+  void set_mapping_refs()
+  {
+    const auto coef = 1.0f / ( 2.0f + ( iteration + 1 ) * ( iteration + 1 ) );
+
+    if constexpr ( !ELA )
+    {
+      for ( auto i = 0u; i < node_match.size(); ++i )
+      {
+        node_match[i].map_refs[0] = node_match[i].map_refs[1] = node_match[i].map_refs[2] = 0u;
+      }
+    }
+
+    /* compute current delay and update mapping refs */
+    delay = 0.0f;
+    ntk.foreach_po( [this]( auto s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+      if ( ntk.is_complemented( s ) )
+        delay = std::max( delay, node_match[index].arrival[1] );
+      else
+        delay = std::max( delay, node_match[index].arrival[0] );
+
+      if constexpr ( !ELA )
+      {
+        node_match[index].map_refs[2]++;
+        if ( ntk.is_complemented( s ) )
+          node_match[index].map_refs[1]++;
+        else
+          node_match[index].map_refs[0]++;
+      }
+    } );
+
+    /* compute current area and update mapping refs in top-down order */
+    area = 0.0f;
+    for ( auto it = top_order.rbegin(); it != top_order.rend(); ++it )
+    {
+      const auto index = ntk.node_to_index( *it );
+      /* skip constants and PIs */
+      if ( ntk.is_pi( *it ) )
+      {
+        if ( node_match[index].map_refs[1] > 0u )
+        {
+          /* Add inverter over the negated fanins */
+          area += lib_inv_area;
+        }
+        continue;
+      }
+      else if ( ntk.is_constant( *it ) )
+      {
+        continue;
+      }
+
+      if ( node_match[index].map_refs[2] == 0u )
+        continue;
+
+      auto& node_data = node_match[index];
+      unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+
+      if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts.cuts( index )[node_data.best_cut[use_phase]];
+          auto const& match = matches[index][best_cut->data.match_index];
+          auto ctr = 0u;
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> match.permutation[ctr++] ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+        if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 )
+        {
+          area += lib_inv_area;
+        }
+      }
+
+      /* invert the phase */
+      use_phase = use_phase ^ 1;
+
+      /* if both phases are implemented and used */
+      if ( !node_data.same_match && node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts.cuts( index )[node_data.best_cut[use_phase]];
+          auto const& match = matches[index][best_cut->data.match_index];
+          auto ctr = 0u;
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> match.permutation[ctr++] ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+      }
+    }
+
+    /* blend flow references */
+    for ( auto i = 0u; i < ntk.size(); ++i )
+    {
+      node_match[i].est_refs[2] = coef * node_match[i].est_refs[2] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[2] ) );
+      node_match[i].est_refs[1] = coef * node_match[i].est_refs[1] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[1] ) );
+      node_match[i].est_refs[0] = coef * node_match[i].est_refs[0] + ( 1.0f - coef ) * std::max( 1.0f, static_cast<float>( node_match[i].map_refs[0] ) );
+    }
+
+    ++iteration;
+  }
+
+  void compute_required_time()
+  {
+    for ( auto i = 0u; i < node_match.size(); ++i )
+    {
+      node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<float>::max();
+    }
+    
+    /* return in case of `skip_delay_round` */
+    if ( iteration == 0 )
+      return;
+
+    auto required = delay;
+
+    if ( ps.required_time != 0.0f )
+    {
+      /* Global target time constraint */
+      if ( ps.required_time < delay - epsilon )
+      {
+        if ( !ps.skip_delay_round && iteration == 1 )
+          std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl;
+      }
+      else
+      {
+        required = ps.required_time;
+      }
+    }
+
+    /* set the required time at POs */
+    ntk.foreach_po( [&]( auto const& s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+      if ( ntk.is_complemented( s ) )
+        node_match[index].required[1] = required;
+      else
+        node_match[index].required[0] = required;
+    } );
+
+    /* propagate required time to the PIs */
+    auto i = ntk.size();
+    while ( i-- > 0u )
+    {
+      const auto n = ntk.index_to_node( i );
+      if ( ntk.is_pi( n ) || ntk.is_constant( n ) )
+        break;
+
+      if ( node_match[i].map_refs[2] == 0 )
+        continue;
+
+      auto& node_data = node_match[i];
+
+      unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+      unsigned other_phase = use_phase ^ 1;
+
+      assert( node_data.best_supergate[0] != nullptr || node_data.best_supergate[1] != nullptr );
+      assert( node_data.map_refs[0] || node_data.map_refs[1] );
+
+      /* propagate required time over output inverter if present */
+      if ( node_data.same_match && node_data.map_refs[other_phase] > 0 )
+      {
+        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );   
+      }
+
+      if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+      {
+        auto ctr = 0u;
+        auto best_cut = cuts.cuts( i )[node_data.best_cut[use_phase]];
+        auto const& match = matches[i][best_cut->data.match_index];
+        auto const& supergate = node_data.best_supergate[use_phase];
+        for ( auto leaf : best_cut )
+        {
+          auto phase = ( node_data.phase[use_phase] >> match.permutation[ctr] ) & 1;
+          node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[use_phase] - supergate->tdelay[match.permutation[ctr]] );
+          ctr++;
+        }
+      }
+
+      if ( !node_data.same_match && node_data.map_refs[other_phase] > 0 )
+      {
+        auto ctr = 0u;
+        auto best_cut = cuts.cuts( i )[node_data.best_cut[other_phase]];
+        auto const& match = matches[i][best_cut->data.match_index];
+        auto const& supergate = node_data.best_supergate[other_phase];
+        for ( auto leaf : best_cut )
+        {
+          auto phase = ( node_data.phase[other_phase] >> match.permutation[ctr] ) & 1;
+          node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[other_phase] - supergate->tdelay[match.permutation[ctr]] );
+          ctr++;
+        }
+      }
+    }
+  }
+
+  template<bool DO_AREA>
+  void match_phase( node<Ntk> const& n, uint8_t phase )
+  {
+    float best_arrival = std::numeric_limits<float>::max();
+    float best_area_flow = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint8_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    auto& cut_matches = matches[index];
+    exact_supergate<NtkDest, NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* recompute best match info */
+    if ( best_supergate != nullptr )
+    {
+      auto const& cut = cuts.cuts( index )[node_data.best_cut[phase]];
+      auto& supergates = cut_matches[( cut )->data.match_index];
+
+      /* permutate the children to the NPN-represenentative configuration */
+      std::vector<uint32_t> children( NInputs, 0u );
+      auto ctr = 0u;
+      for ( auto l : cut )
+      {
+        children[supergates.permutation[ctr++]] = l;
+      }
+
+      best_phase = node_data.phase[phase];
+      best_arrival = 0.0f;
+      best_area_flow = best_supergate->area + cut_leaves_flow( cut, n, phase );
+      best_area = best_supergate->area;
+      best_cut = node_data.best_cut[phase];
+      best_size = cut.size();
+      for ( auto pin = 0u; pin < NInputs; pin++ )
+      {
+        float arrival_pin = node_match[children[pin]].arrival[( best_phase >> pin ) & 1] + best_supergate->tdelay[pin];
+        best_arrival = std::max( best_arrival, arrival_pin );
+      }
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts.cuts( index ) )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->data.ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = cut_matches[( *cut )->data.match_index];
+
+      if ( supergates.supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* permutate the children to the NPN-represenentative configuration */
+      std::vector<uint32_t> children( NInputs, 0u );
+      auto ctr = 0u;
+      for ( auto l : *cut )
+      {
+        children[supergates.permutation[ctr++]] = l;
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates.supergates[phase] )
+      {
+        uint8_t complement = gate.polarity ^ supergates.negation;
+        node_data.phase[phase] = complement;
+        float area_local = gate.area + cut_leaves_flow( *cut, n, phase );
+        float worst_arrival = 0.0f;
+        for ( auto pin = 0u; pin < NInputs; pin++ )
+        {
+          float arrival_pin = node_match[children[pin]].arrival[( complement >> pin ) & 1] + gate.tdelay[pin];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+        }
+
+        if constexpr ( DO_AREA )
+        {
+          if ( worst_arrival > node_data.required[phase] + epsilon )
+            continue;
+        }
+
+        if ( compare_map<DO_AREA>( worst_arrival, best_arrival, area_local, best_area_flow, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_area_flow = area_local;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_area = gate.area;
+          best_phase = complement;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_area_flow;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+  }
+
+  void match_phase_exact( node<Ntk> const& n, uint8_t phase )
+  {
+    float best_arrival = std::numeric_limits<float>::max();
+    float best_exact_area = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint8_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    auto& cut_matches = matches[index];
+    exact_supergate<NtkDest, NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+
+    /* recompute best match info */
+    if ( best_supergate != nullptr )
+    {
+      auto const& cut = cuts.cuts( index )[node_data.best_cut[phase]];
+      auto const& supergates = cut_matches[( cut )->data.match_index];
+
+      /* permutate the children to the NPN-represenentative configuration */
+      std::vector<uint32_t> children( NInputs, 0u );
+      auto ctr = 0u;
+      for ( auto l : cut )
+      {
+        children[supergates.permutation[ctr++]] = l;
+      }
+
+      best_phase = best_supergate->polarity ^ supergates.negation;
+      best_arrival = 0.0f;
+      best_area = best_supergate->area;
+      best_cut = node_data.best_cut[phase];
+      best_size = cut.size();
+      for ( auto pin = 0u; pin < NInputs; pin++ )
+      {
+        float arrival_pin = node_match[children[pin]].arrival[( best_phase >> pin ) & 1] + best_supergate->tdelay[pin];
+        best_arrival = std::max( best_arrival, arrival_pin );
+      }
+
+      /* if cut is implemented, remove it from the cover */
+      if ( !node_data.same_match && node_data.map_refs[phase] )
+      {
+        best_exact_area = cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+      else
+      {
+        best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+        cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts.cuts( index ) )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->data.ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = cut_matches[( *cut )->data.match_index];
+
+      if ( supergates.supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* permutate the children to the NPN-represenentative configuration */
+      std::vector<uint32_t> children( NInputs, 0u );
+      auto ctr = 0u;
+      for ( auto l : *cut )
+      {
+        children[supergates.permutation[ctr++]] = l;
+      }
+
+      for ( auto const& gate : *supergates.supergates[phase] )
+      {
+        uint8_t complement = gate.polarity ^ supergates.negation;
+        node_data.phase[phase] = complement;
+        node_data.area[phase] = gate.area;
+        auto area_exact = cut_ref( *cut, n, phase );
+        cut_deref( *cut, n, phase );
+        float worst_arrival = 0.0f;
+        for ( auto pin = 0u; pin < NInputs; pin++ )
+        {
+          float arrival_pin = node_match[children[pin]].arrival[( complement >> pin ) & 1] + gate.tdelay[pin];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+        }
+
+        if ( worst_arrival > node_data.required[phase] + epsilon )
+          continue;
+
+        if ( compare_map<true>( worst_arrival, best_arrival, area_exact, best_exact_area, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_exact_area = area_exact;
+          best_area = gate.area;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_phase = complement;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_exact_area;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+
+    if ( !node_data.same_match && node_data.map_refs[phase] )
+    {
+      best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+    }
+  }
+
+  template<bool DO_AREA, bool ELA>
+  void match_drop_phase( node<Ntk> const& n, unsigned area_margin_factor )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+
+    /* compute arrival adding an inverter to the other match phase */
+    float worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
+    float worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
+    bool use_zero = false;
+    bool use_one = false;
+
+    /* only one phase is matched */
+    if ( node_data.best_supergate[0] == nullptr )
+    {
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+      }
+      return;
+    }
+    else if ( node_data.best_supergate[1] == nullptr )
+    {
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+      }
+      return;
+    }
+
+    /* try to use only one match to cover both phases */
+    if constexpr ( !DO_AREA )
+    {
+      /* if arrival is less matching the other phase and inserting an inverter */
+      if ( worst_arrival_npos < node_data.arrival[0] + epsilon )
+      {
+        use_one = true;
+      }
+      if ( worst_arrival_nneg < node_data.arrival[1] + epsilon )
+      {
+        use_zero = true;
+      }
+      if ( !use_zero && !use_one )
+      {
+        /* use both phases to improve delay */
+        node_data.flows[2] = ( node_data.flows[0] + node_data.flows[1] ) / node_data.est_refs[2];
+        node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0];
+        node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1];
+        return;
+      }
+    }
+    else
+    {
+      /* check if both phases + inverter meet the required time */
+      use_zero = worst_arrival_nneg < node_data.required[1] + epsilon - area_margin_factor * lib_inv_delay;
+      use_one = worst_arrival_npos < node_data.required[0] + epsilon - area_margin_factor * lib_inv_delay;
+    }
+
+    /* use area flow as a tiebreaker. Unfortunatly cannot keep
+     * the both phases since `node_map` does not support that */
+    if ( use_zero && use_one )
+    {
+      auto size_zero = cuts.cuts( index )[node_data.best_cut[0]].size();
+      auto size_one = cuts.cuts( index )[node_data.best_cut[1]].size();
+      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos,  node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+        use_one = false;
+      else
+        use_zero = false;
+    }
+
+    if ( use_zero )
+    {
+      if constexpr ( ELA )
+      {
+        if ( !node_data.same_match ) 
+        {
+          if ( node_data.map_refs[1] > 0 )
+            cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          if ( node_data.map_refs[0] == 0 )
+            cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+      }
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+    }
+    else
+    {
+      if constexpr ( ELA )
+      {
+        if ( !node_data.same_match ) 
+        {
+          if ( node_data.map_refs[0] > 0 )
+            cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          if ( node_data.map_refs[1] == 0 && node_data.map_refs[2] )
+            cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+      }
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+    }
+  }
+
+  inline void set_match_complemented_phase( uint32_t index, uint8_t phase, float worst_arrival_n )
+  {
+    auto& node_data = node_match[index];
+    auto phase_n = phase ^ 1;
+    node_data.same_match = true;
+    node_data.best_supergate[phase_n] = nullptr;
+    node_data.best_cut[phase_n] = node_data.best_cut[phase];
+    node_data.phase[phase_n] = node_data.phase[phase] ^ ( 1 << NInputs );
+    node_data.arrival[phase_n] = worst_arrival_n;
+    node_data.area[phase_n] = node_data.area[phase];
+    node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[2];
+    node_data.flows[phase_n] = node_data.flows[phase];
+    node_data.flows[2] = node_data.flows[phase];
+  }
+
+  inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    float flow{0.0f};
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    auto const& match = matches[ntk.node_to_index( n )][cut->data.match_index];
+
+    uint8_t ctr = 0u;
+    for ( auto leaf : cut )
+    {
+      uint8_t leaf_phase = ( node_data.phase[phase] >> match.permutation[ctr++] ) & 1;
+      flow += node_match[leaf].flows[leaf_phase];
+    }
+
+    return flow;
+  }
+
+  float cut_ref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    auto const& match = matches[ntk.node_to_index( n )][cut->data.match_index];
+    float count = node_data.area[phase];
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> match.permutation[ctr] ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        ++ctr;
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* reference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( node_match[leaf].map_refs[1]++ == 0u )
+            count += lib_inv_area;
+        }
+        else
+        {
+          ++node_match[leaf].map_refs[0];
+        }
+        ++ctr;
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+          count += lib_inv_area;
+        /* Recursive referencing if leaf was not referenced */
+        if ( node_match[leaf].map_refs[2]++ == 0u )
+        {
+          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        ++node_match[leaf].map_refs[2];
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u )
+        {
+          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      ++ctr;
+    }
+    return count;
+  }
+
+  float cut_deref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    auto const& match = matches[ntk.node_to_index( n )][cut->data.match_index];
+    float count = node_data.area[phase];
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> match.permutation[ctr] ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        ++ctr;
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* dereference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( --node_match[leaf].map_refs[1] == 0u )
+            count += lib_inv_area;
+        }
+        else
+        {
+          --node_match[leaf].map_refs[0];
+        }
+        ++ctr;
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+          count += lib_inv_area;
+        /* Recursive dereferencing */
+        if ( --node_match[leaf].map_refs[2] == 0u )
+        {
+          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        --node_match[leaf].map_refs[2];
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u )
+        {
+          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      ++ctr;
+    }
+    return count;
+  }
+
+  template<bool DO_AREA>
+  inline bool compare_map( float arrival, float best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size )
+  {
+    if constexpr ( DO_AREA )
+    {
+      if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+      else if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+    }
+    else
+    {
+      if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+      else if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+    }
+    if ( size < best_size )
+    {
+      return true;
+    }
+    return false;
+  }
+
+
+private:
+  Ntk& ntk;
+  exact_library<NtkDest, RewritingFn, NInputs> const& library;
+  map_params const& ps;
+  map_stats& st;
+
+  uint32_t iteration{0}; /* current mapping iteration */
+  double delay{0.0f};     /* current delay of the mapping */
+  double area{0.0f};      /* current area of the mapping */
+  const float epsilon{0.005f}; /* epsilon */
+
+  /* lib inverter info */
+  float lib_inv_area;
+  float lib_inv_delay;
+
+  std::vector<node<Ntk>> top_order;
+  std::vector<node_match_t<NtkDest, NInputs>> node_match;
+  std::unordered_map<uint32_t, std::vector<cut_match_t<NtkDest, NInputs>>> matches;
+  network_cuts_t cuts;
+};
+
+} /* namespace detail */
+
+/*! \brief Exact mapping.
+ *
+ * This function implements a mapping algorithm using an exact synthesis database.
+ * It is controlled by a template argument `CutData` (defaulted to
+ * `cut_enumeration_exact_map_cut`). The argument is similar to the `CutData` argument
+ * in `cut_enumeration`, which can specialize the cost function to select priority
+ * cuts and store additional data. The default argument gives priority firstly to
+ * area flow, then delay, and lastly to the cut size.
+ * The type passed as `CutData` must implement the following four fields:
+ *
+ * - `uint32_t delay`
+ * - `float flow`
+ * - `uint8_t match_index`
+ * - `bool ignore`
+ *
+ * See `include/mockturtle/algorithms/cut_enumeration/cut_enumeration_exact_map_cut.hpp`
+ * for one example of a CutData type that implements the cost function that is used in
+ * the technology mapper.
+ *
+ * The function returns a mapped network representation generated using the exact
+ * synthesis entries in the `exact_library`.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ *
+ * \param ntk Network
+ * \param library Exact library
+ * \param ps Mapping params
+ * \param pst Mapping statistics
+ */
+template<class Ntk, class NtkDest = Ntk, class RewritingFn, unsigned NInputs, typename CutData = cut_enumeration_exact_map_cut>
+NtkDest exact_map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
+
+  map_stats st;
+  detail::exact_map_impl<NtkDest, Ntk, RewritingFn, CutData, NInputs> p( ntk, library, ps, st );
+  auto res = p.run();
+
+  st.time_total = st.time_mapping + st.cut_enumeration_st.time_total;
+  if ( ps.verbose )
+  {
+    st.report();
+  }
+
+  if ( pst )
+  {
+    *pst = st;
+  }
+
+  return res;
+}
+
+} /* namespace mockturtle */
diff --git a/include/mockturtle/io/genlib_reader.hpp b/include/mockturtle/io/genlib_reader.hpp
index 4cbad5567..9c30153cd 100644
--- a/include/mockturtle/io/genlib_reader.hpp
+++ b/include/mockturtle/io/genlib_reader.hpp
@@ -62,6 +62,7 @@ struct pin
 
 struct gate
 {
+  unsigned int id;
   std::string name;
   std::string expression;
   uint32_t num_vars;
@@ -115,7 +116,8 @@ class genlib_reader : public lorina::genlib_reader
                            p.input_load, p.max_load,
                            p.rise_block_delay, p.rise_fanout_delay, p.fall_block_delay, p.fall_fanout_delay} );
     }
-    gates.emplace_back( gate{name, expression, num_vars, tt, area, pp} );
+    gates.emplace_back( gate{static_cast<unsigned int>( gates.size() ), name,
+                             expression, num_vars, tt, area, pp} );
   }
 
 protected:
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
new file mode 100644
index 000000000..273284a5a
--- /dev/null
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -0,0 +1,558 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file tech_library.hpp
+  \brief Implements utilities to enumerates gates for technology mapping
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <cassert>
+#include <vector>
+#include <unordered_map>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/static_truth_table.hpp>
+#include <kitty/npn.hpp>
+#include <kitty/print.hpp>
+
+#include "../io/genlib_reader.hpp"
+
+namespace mockturtle
+{
+
+/*
+std::string const mcnc_library =  "GATE   inv1    1	O=!a;		        PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3	  3	O=!a;		        PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "#GATE  xor2b	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "#GATE  xnor2b	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero	  0	O=0;\n"
+                                  "GATE   one		  0	O=1;";
+*/
+
+struct tech_library_params
+{
+  /*! \brief reports np enumerations */
+  bool verbose{false};
+
+  /*! \brief reports all the entries in the library */
+  bool very_verbose{false};
+};
+
+
+template<unsigned NInputs>
+struct supergate
+{
+  struct gate const* root{};
+
+  /* area */
+  float area{0};
+  /* worst delay */
+  float worstDelay{0};
+  /* pin-to-pin delay */
+  std::array<float, NInputs> tdelay{};
+
+  /* np permutation vector */
+  std::vector<uint8_t> permutation{};
+
+  /* pin negations */
+  uint8_t polarity{0};
+};
+
+/*! \brief Library of np-enumerated gates
+ *
+ * This class creates a technology library from a set
+ * of input gates. Each NP-configuration of each gate
+ * is enumerated and inserted in the library.
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      std::vector<gate> gates;
+      lorina::read_genlib( "file.lib", genlib_reader( gates ) );
+      mockturtle::tech_library lib( gates );
+   \endverbatim
+ */
+template<unsigned NInputs = 5u>
+class tech_library
+{
+  using supergates_list_t = std::vector<supergate<NInputs>>;
+  using tt_hash = kitty::hash<kitty::static_truth_table<NInputs>>;
+  using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
+
+public:
+  tech_library( std::vector<gate> const gates, tech_library_params const ps = {} )
+    : _gates( gates ),
+      _ps ( ps ),
+      _super_lib()
+  {
+    generate_library();
+  }
+
+  const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
+  {
+    auto match = _super_lib.find( tt );
+    if ( match != _super_lib.end() )
+      return &match->second;
+    return nullptr;
+  }
+
+  const std::tuple<float, float, uint32_t> get_inverter_info() const
+  {
+    return std::make_tuple( _inv_area, _inv_delay, _inv_id );
+  }
+
+  unsigned max_gate_size()
+  {
+    return _max_size;
+  }
+
+  const std::vector<gate> get_gates() const
+  {
+      return _gates;
+  }
+
+private:
+  void generate_library()
+  {
+    bool inv = false;
+
+    for ( auto& gate : _gates )
+    {
+      if ( gate.function.num_vars() > NInputs )
+      {
+        std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, too many variables for the library settings" << std::endl;
+        continue;
+      }
+
+      float worst_delay = compute_worst_delay( gate );
+
+      if ( gate.function.num_vars() == 1 )
+      {
+        /* extract inverter delay and area */
+        if ( kitty::is_const0( kitty::cofactor1( gate.function, 0 ) ) )
+        {
+          /* get the smallest area inverter */
+          if ( !inv || gate.area < _inv_area )
+          {
+            _inv_area = gate.area;
+            _inv_delay = worst_delay;
+            _inv_id = gate.id;
+            inv = true;
+          }
+        }
+      }
+
+      _max_size = std::max( _max_size, gate.num_vars );
+
+      uint32_t np_count = 0;
+
+      const auto on_np = [&]( auto const& tt, auto neg, auto const& perm ) {
+        supergate<NInputs> sg;
+        sg.root = &gate;
+        sg.area = gate.area;
+        sg.worstDelay = worst_delay;
+        sg.polarity = 0;
+        sg.permutation = perm;
+
+        for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
+        {
+          sg.tdelay[i] = worst_delay;  /* if pin-to-pin delay change to: gate.delay[perm[i]] */
+          sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
+        }
+        for ( auto i = perm.size(); i < NInputs; ++i )
+        {
+          sg.tdelay[i] = 0; /* added for completeness but not necessary */
+        }
+
+        const auto static_tt = kitty::extend_to<NInputs>( tt );
+
+        auto& v = _super_lib[static_tt];
+
+        /* ordered insert by ascending area and number of input pins */
+        auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+          if ( s1.area < s2.area )
+            return true;
+          if ( s1.area > s2.area )
+            return false;
+          if ( s1.root->num_vars < s2.root->num_vars )
+            return true;
+          if ( s1.root->num_vars > s2.root->num_vars )
+            return true;
+          return s1.root->id < s2.root->id;
+        } );
+
+        bool to_add = true;
+        /* search for duplicated element due to symmetries */
+        while ( it != v.end() )
+        {
+          if ( sg.root->id == it->root->id )
+          {
+            /* if already in the library exit, else ignore permutations if with equal delay cost */
+            if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+            {
+              to_add = false;
+              break;
+            }
+          }
+          else
+          {
+            break;
+          }
+          ++it;
+        }
+
+        if ( to_add )
+        {
+          v.insert( it, sg );
+          ++np_count;
+        }
+
+        /* check correct results */
+        // assert( gate.function == create_from_npn_config( std::make_tuple( tt, neg, sg.permutation ) ) );
+      };
+
+      /* NP enumeration of the function */
+      const auto tt = gate.function;
+      kitty::exact_np_enumeration( tt, on_np );
+
+      if ( _ps.verbose )
+      {
+        std::cout << "Gate " << gate.name << ", num_vars = " << gate.num_vars << ", np entries = " << np_count << std::endl;
+      }
+    }
+
+    if ( !inv )
+    {
+      std::cerr << "[i] WARNING: inverter gate has not been detected in the library" << std::endl;
+    }
+
+    if ( _ps.very_verbose )
+    {
+      for ( auto const& entry : _super_lib )
+      {
+        kitty::print_hex( entry.first );
+        std::cout << ": ";
+        for ( auto const& gate : entry.second )
+        {
+          printf( "%s(d:%.2f, a:%.2f, p:%d) ", gate.root->name.c_str(), gate.worstDelay, gate.area, gate.polarity );
+        }
+        std::cout << std::endl;
+      }
+    }
+  }
+
+  float compute_worst_delay( gate const& g )
+  {
+    float worst_delay = 0.0f;
+
+    /* consider only block_delay */
+    for ( auto const& pin : g.pins )
+    {
+      float worst_pin_delay = static_cast<float>( std::max( pin.rise_block_delay, pin.fall_block_delay ) );
+      worst_delay = std::max( worst_delay, worst_pin_delay );
+    }
+    return worst_delay;
+  }
+
+private:
+  /* inverter info */
+  float _inv_area{0.0};
+  float _inv_delay{0.0};
+  uint32_t _inv_id{UINT32_MAX};
+
+  unsigned _max_size{0}; /* max #fanins of the gates in the library */
+
+  std::vector<gate> const _gates; /* collection of gates */
+  tech_library_params const _ps;
+  lib_t _super_lib; /* library of enumerated gates */
+};
+
+
+
+template<typename Ntk, unsigned NInputs>
+struct exact_supergate
+{
+  signal<Ntk> const root;
+
+  /* number of inputs of the supergate */
+  uint8_t n_inputs{0};
+  /* saved polarities for inputs and/or outputs */
+  uint8_t polarity{0};
+  
+  /* area */
+  float area{0};
+  /* worst delay */
+  float worstDelay{0};
+  /* pin-to-pin delay */
+  std::array<float, NInputs> tdelay{0};
+
+  exact_supergate( signal<Ntk> const root )
+    : root( root ) {}
+};
+
+struct exact_library_params
+{
+  /* area of a gate */
+  float area_gate{1.0f};
+  /* area of an inverter */
+  float area_inverter{0.0f};
+  /* delay of a gate */
+  float delay_gate{1.0f};
+  /* delay of an inverter */
+  float delay_inverter{0.0f};
+
+  /* classify in NP instead of NPN */
+  bool np_classification{true};
+  /* verbose */
+  bool verbose{false};
+};
+
+/*! \brief Library of exact synthesis supergates
+ *
+ * This class creates a technology library from an exact
+ * synthesis database. Each NPN-entry in the database is
+ * stored in its NP class by removing the output inverter
+ * if present. The class creates supergates from the
+ * database computing area and delay information.
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      mockturtle::mig_npn_resynthesis mig_resyn{true};
+      mockturtle::exact_library<mockturtle::mig_network, mockturtle::mig_npn_resynthesis, 4> lib( mig_resyn );
+   \endverbatim
+ */
+template<typename Ntk, class RewritingFn, unsigned NInputs>
+class exact_library
+{
+  using supergates_list_t = std::vector<exact_supergate<Ntk, NInputs>>;
+  using tt_hash = kitty::hash<kitty::static_truth_table<NInputs>>;
+  using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
+
+public:
+  exact_library( RewritingFn const& rewriting_fn, exact_library_params const& ps = {} )
+  : _database(),
+    _rewriting_fn( rewriting_fn ),
+    _ps( ps ),
+    _super_lib()
+  {
+    generate_library();
+  }
+
+  const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
+  {
+    auto match = _super_lib.find( tt );
+    if ( match != _super_lib.end() )
+      return &match->second;
+    return nullptr;
+  }
+
+  const Ntk &get_database() const
+  {
+    return _database;
+  }
+
+  const std::tuple<float, float> get_inverter_info() const
+  {
+    return std::make_pair( _ps.area_inverter, _ps.delay_inverter );
+  }
+
+private:
+  void generate_library()
+  {
+    std::vector<signal<Ntk>> pis;
+    for ( auto i = 0u; i < NInputs; ++i )
+    {
+      pis.push_back( _database.create_pi() );
+    }
+
+    /* Compute NPN classes */
+    std::unordered_set<kitty::static_truth_table<NInputs>, tt_hash> classes;
+    kitty::static_truth_table<NInputs> tt;
+    do
+    {
+      const auto res = kitty::exact_npn_canonization( tt );
+      classes.insert( std::get<0>( res ) );
+      kitty::next_inplace( tt );
+    } while ( !kitty::is_const0( tt ) );
+
+    /* Constuct supergates */
+    for ( auto const &entry : classes )
+    {
+      supergates_list_t supergates_pos;
+      supergates_list_t supergates_neg;
+      auto const not_entry = ~entry;
+
+      const auto add_supergate = [&]( auto const& f_new ) {
+        bool complemented = _database.is_complemented( f_new );
+        auto f = f_new;
+        if ( _ps.np_classification && complemented ) {
+          f = !f;
+        }
+        exact_supergate<Ntk, NInputs> sg( f );
+        compute_info( sg );
+        if ( _ps.np_classification && complemented )
+        {
+          supergates_neg.push_back( sg );
+        }
+        else
+        {
+          supergates_pos.push_back( sg );
+        }
+        _database.create_po( f );
+        return true;
+      };
+
+      kitty::dynamic_truth_table function = kitty::extend_to( entry, NInputs );
+      _rewriting_fn( _database, function, pis.begin(), pis.end(), add_supergate );
+      if ( supergates_pos.size() > 0 )
+        _super_lib.insert( {entry, supergates_pos} );
+      if ( _ps.np_classification && supergates_neg.size() > 0 )
+        _super_lib.insert( {not_entry, supergates_neg} );
+    }
+
+    if ( _ps.verbose )
+    {
+      std::cout << "Classified in " << _super_lib.size() << " entries" << std::endl;
+      for ( auto const &pair : _super_lib )
+      {
+        kitty::print_hex( pair.first );
+        std::cout << ": ";
+
+        for ( auto const&  gate : pair.second )
+        {
+          printf( "%.2f,%.2f,%d,%d,:", gate.worstDelay, gate.area, gate.polarity, gate.n_inputs );
+          for ( auto j = 0u; j < NInputs; ++j )
+            printf( "%.2f/", gate.tdelay[j] );
+          std::cout << " ";
+        }
+        std::cout << std::endl;
+      }
+    }
+  }
+
+  /* Computes delay and area info */
+  void compute_info( exact_supergate<Ntk, NInputs> &sg )
+  {
+    _database.incr_trav_id();
+    /* info does not consider input and output inverters */
+    bool compl_root = _database.is_complemented( sg.root );
+    auto const root = compl_root ? !sg.root : sg.root;
+    sg.area = compute_info_rec( sg, root, 0.0f );
+
+    /* output polarity */
+    sg.polarity |= ( unsigned( compl_root ) ) << NInputs;
+    /* number of inputs */
+    for( auto i = 0u; i < NInputs; ++i )
+    {
+      sg.tdelay[i] *= -1;   /* invert to positive value */
+      if ( sg.tdelay[i] != 0.0f )
+        sg.n_inputs++;
+    }
+  }
+
+
+  float compute_info_rec( exact_supergate<Ntk, NInputs> &sg, signal<Ntk> const& root, float delay )
+  {
+    auto n = _database.get_node( root );
+
+    if ( _database.is_constant( n ) )
+      return 0.0f;
+
+    float area = 0.0f;
+    float tdelay = delay;
+
+    if ( _database.is_pi( n ) )
+    {
+      sg.tdelay[_database.index_to_node( n ) - 1u] = std::min(sg.tdelay[_database.index_to_node( n ) - 1u], tdelay);
+      sg.worstDelay = std::min(sg.worstDelay, tdelay);
+      sg.polarity |= ( unsigned( _database.is_complemented( root ) ) ) << ( _database.index_to_node( n ) - 1u );
+      return area;
+    }
+
+    tdelay -= _ps.delay_gate;
+
+    /* add gate area once */
+    if ( _database.visited( n ) != _database.trav_id() )
+    {
+      area += _ps.area_gate;
+      _database.set_value( n, 0u );
+      _database.set_visited( n, _database.trav_id() );
+    }
+
+    if ( _database.is_complemented( root ) )
+    {
+      tdelay -= _ps.delay_inverter;
+      /* add inverter area only once (shared by fanout) */
+      if ( _database.value( n ) == 0u )
+      {
+        area += _ps.area_inverter;
+        _database.set_value( n, 1u );
+      }
+    }
+
+    _database.foreach_fanin( n, [&]( auto const& child ) {
+      area += compute_info_rec( sg, child, tdelay );
+    } );
+
+    return area;
+  }
+
+private:
+  Ntk _database;
+  RewritingFn const& _rewriting_fn;
+  exact_library_params const& _ps;
+  lib_t _super_lib;
+};
+
+}
diff --git a/lib/kitty/kitty/npn.hpp b/lib/kitty/kitty/npn.hpp
index e3a654e8b..29460ff15 100755
--- a/lib/kitty/kitty/npn.hpp
+++ b/lib/kitty/kitty/npn.hpp
@@ -542,6 +542,89 @@ std::tuple<TT, uint32_t, std::vector<uint8_t>> sifting_p_canonization( const TT&
   return std::make_tuple( npn, phase, perm );
 }
 
+/*! \brief Exact NP enumeration
+
+  Given a truth table, this function enumerates all the functions in its
+  NP class. Two functions are in the same NP class, if one can be obtained
+  from the other by input negation and input permutation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - NP-enumerated truth table
+  - input negations
+  - input permutation to apply
+
+  \param tt Truth table
+  \param fn Callback for each enumerated truth table in the NP class
+*/
+template<typename TT, typename Callback>
+void exact_np_enumeration( const TT& tt, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tt, 0u, std::vector<uint8_t>{} );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tt, 0u, std::vector<uint8_t>{0} );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+
+  std::vector<uint8_t> perm( num_vars );
+  std::iota( perm.begin(), perm.end(), 0u );
+
+  uint32_t phase = 0;
+
+  fn( t1, phase, perm );
+
+  const auto& swaps = detail::swaps[num_vars - 2u];
+  const auto& flips = detail::flips[num_vars - 2u];
+
+  for ( std::size_t i = 0; i < swaps.size(); ++i )
+  {
+    const auto pos = swaps[i];
+    swap_adjacent_inplace( t1, pos );
+
+    std::swap( perm[pos], perm[pos + 1] );
+
+    fn( t1, phase, perm );
+  }
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    swap_adjacent_inplace( t1, 0 );
+    flip_inplace( t1, pos );
+
+    std::swap( perm[0], perm[1] );
+    phase ^= 1 << perm[pos];
+
+    fn( t1, phase, perm );
+
+    for ( std::size_t i = 0; i < swaps.size(); ++i )
+    {
+      const auto pos = swaps[i];
+      swap_adjacent_inplace( t1, pos );
+
+      std::swap( perm[pos], perm[pos + 1] );
+
+      fn( t1, phase, perm );
+    }
+  }
+}
+
 /*! \brief Obtain truth table from NPN configuration
 
   Given an NPN configuration, which contains a representative
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
new file mode 100644
index 000000000..298d9e5a4
--- /dev/null
+++ b/test/algorithms/mapper.cpp
@@ -0,0 +1,197 @@
+#include <catch.hpp>
+
+#include <cstdint>
+#include <vector>
+
+#include <lorina/genlib.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/mig.hpp>
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/algorithms/mapper.hpp>
+#include <mockturtle/generators/arithmetic.hpp>
+
+
+
+using namespace mockturtle;
+
+std::string const test_library =  "GATE   inv1    1	O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2	  2	O=!a;		  PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   nand2	  2	O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   xor2	  5	O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   mig3    3	O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
+                                  "GATE   zero	  0	O=0;\n"
+                                  "GATE   one		  0	O=1;";
+
+
+TEST_CASE( "Map of MAJ3", "[mapper]" )
+{
+  std::vector<mockturtle::gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  mockturtle::tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, b, c );
+  aig.create_po( f );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.area == 3.0f );
+  CHECK( st.delay == 2.0f );
+}
+
+TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
+{
+  std::vector<mockturtle::gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  mockturtle::tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, aig.create_maj( a, b, c ), c );
+  aig.create_po( f );
+  aig.create_po( aig.get_constant( true ) );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.area == 3.0f );
+  CHECK( st.delay == 2.0f );
+}
+
+TEST_CASE( "Map of full adder", "[mapper]" )
+{
+  std::vector<mockturtle::gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  mockturtle::tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  const float eps{0.005f};
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Map with inverters", "[mapper]" )
+{
+  std::vector<mockturtle::gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  mockturtle::tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f1 = aig.create_and( !a, b );
+  const auto f2 = aig.create_and( f1, !c );
+
+  aig.create_po( f2 );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  const float eps{0.005f};
+
+  CHECK( luts.size() == 11u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 6u );
+  CHECK( st.area > 8.0f - eps );
+  CHECK( st.area < 8.0f + eps );
+  CHECK( st.delay > 4.7f - eps );
+  CHECK( st.delay < 4.7f + eps );
+}
+
+TEST_CASE( "Map for inverters minimization", "[mapper]" )
+{
+  std::vector<mockturtle::gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  mockturtle::tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( !a, !b, !c );
+  aig.create_po( f );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  const float eps{0.005f};
+
+  CHECK( luts.size() == 7u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 2u );
+  CHECK( st.area > 4.0f - eps );
+  CHECK( st.area < 4.0f + eps );
+  CHECK( st.delay > 2.9f - eps );
+  CHECK( st.delay < 2.9f + eps );
+}
\ No newline at end of file
diff --git a/test/io/genlib_reader.cpp b/test/io/genlib_reader.cpp
index 135f9a48d..9fe02e23c 100644
--- a/test/io/genlib_reader.cpp
+++ b/test/io/genlib_reader.cpp
@@ -25,6 +25,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( result == lorina::return_code::success );
 
   CHECK( gates.size() == 5u );
+  CHECK( gates[0u].id == 0u );
   CHECK( gates[0u].name == "zero" );
   CHECK( gates[0u].expression == "0" );
   CHECK( gates[0u].function._bits[0] == 0 );
@@ -32,6 +33,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[0u].area == 0.0 );
   CHECK( gates[0u].pins.empty() );
 
+  CHECK( gates[1u].id == 1u );
   CHECK( gates[1u].name == "one" );
   CHECK( gates[1u].expression == "1" );
   CHECK( gates[1u].function._bits[0] == 1 );
@@ -39,6 +41,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[1u].area == 0.0 );
   CHECK( gates[1u].pins.empty() );
 
+  CHECK( gates[2u].id == 2u );
   CHECK( gates[2u].name == "inverter" );
   CHECK( gates[2u].expression == "!a" );
   CHECK( gates[2u].function._bits[0] == 1 );
@@ -54,6 +57,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
   CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
 
+  CHECK( gates[3u].id == 3u );
   CHECK( gates[3u].name == "buffer" );
   CHECK( gates[3u].expression == "a" );
   CHECK( gates[3u].function._bits[0] == 2 );
@@ -70,6 +74,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
   CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
 
+  CHECK( gates[4u].id == 4u );
   CHECK( gates[4u].name == "and" );
   CHECK( gates[4u].expression == "(ab)" );
   CHECK( gates[4u].function._bits[0] == 8 );
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
new file mode 100644
index 000000000..99cdda230
--- /dev/null
+++ b/test/utils/tech_library.cpp
@@ -0,0 +1,164 @@
+#include <catch.hpp>
+
+#include <cstdint>
+#include <vector>
+
+#include <lorina/genlib.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/utils/tech_library.hpp>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/static_truth_table.hpp>
+#include <kitty/npn.hpp>
+
+using namespace mockturtle;
+
+std::string const simple_test_library = "GATE   inv1    1	O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                        "GATE   inv2	  2	O=!a;		  PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                        "GATE   nand2	  2	O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
+
+std::string const test_library =  "GATE   inv1    3	O=!a;		        PIN * INV 1 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3	  1	O=!a;		        PIN * INV 3 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero	  0	O=0;\n"
+                                  "GATE   one		  0	O=1;";
+
+
+TEST_CASE( "Simple library generation", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<2> lib( gates );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+
+  kitty::static_truth_table<2> tt;
+
+  kitty::create_from_hex_string( tt, "5" );
+  auto const inv = lib.get_supergates( tt );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 2 );
+  CHECK( ( *inv )[0].root->name == "inv1" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].worstDelay == 0.9f );
+  CHECK( ( *inv )[0].tdelay[0] == 0.9f );
+  CHECK( ( *inv )[0].polarity == 0u );
+  CHECK( ( *inv )[1].root->name == "inv2" );
+  CHECK( ( *inv )[1].area == 2.0f );
+  CHECK( ( *inv )[1].worstDelay == 1.0f );
+  CHECK( ( *inv )[1].tdelay[0] == 1.0f );
+  CHECK( ( *inv )[1].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "7" );
+  auto const nand_7 = lib.get_supergates( tt );
+  CHECK( nand_7 != nullptr );
+  CHECK( nand_7->size() == 1 );
+  CHECK( ( *nand_7 )[0].root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].area == 2.0f );
+  CHECK( ( *nand_7 )[0].worstDelay == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_7 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "b" );
+  auto const nand_b = lib.get_supergates( tt );
+  CHECK( nand_b != nullptr );
+  CHECK( nand_b->size() == 1 );
+  CHECK( ( *nand_b )[0].root->name == "nand2" );
+  CHECK( ( *nand_b )[0].area == 2.0f );
+  CHECK( ( *nand_b )[0].worstDelay == 1.0f );
+  CHECK( ( *nand_b )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_b )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_b )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "d" );
+  auto const nand_d = lib.get_supergates( tt );
+  CHECK( nand_d != nullptr );
+  CHECK( nand_d->size() == 1 );
+  CHECK( ( *nand_d )[0].root->name == "nand2" );
+  CHECK( ( *nand_d )[0].area == 2.0f );
+  CHECK( ( *nand_d )[0].worstDelay == 1.0f );
+  CHECK( ( *nand_d )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_d )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_d )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  auto const nand_e = lib.get_supergates( tt );
+  CHECK( nand_e != nullptr );
+  CHECK( nand_e->size() == 1 );
+  CHECK( ( *nand_e )[0].root->name == "nand2" );
+  CHECK( ( *nand_e )[0].area == 2.0f );
+  CHECK( ( *nand_e )[0].worstDelay == 1.0f );
+  CHECK( ( *nand_e )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_e )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_e )[0].polarity == 3u );
+}
+
+TEST_CASE( "Complete library generation", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  if ( lorina::read_genlib( in, genlib_reader( gates ) ) != lorina::return_code::success )
+  {
+    std::cout << "ERROR IN" << std::endl;
+    std::abort();
+    return;
+  }
+
+  tech_library<4> lib( gates );
+
+  CHECK( lib.max_gate_size() == 4 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 2u ) );
+
+  for ( auto const& gate : gates )
+  {
+    auto const tt = gate.function;
+
+    const auto test_enumeration = [&]( auto const& tt, auto, auto ) {
+      const auto static_tt = kitty::extend_to<4>( tt );
+
+      auto const supergates = lib.get_supergates( static_tt );
+
+      CHECK( supergates != nullptr );
+
+      bool found = false;
+      for ( auto const& supergate : *supergates )
+      {
+        if ( supergate.root->id == gate.id )
+        {
+          found = true;
+          break;
+        }
+      }
+
+      CHECK( found == true );
+    };
+
+    kitty::exact_np_enumeration( tt, test_enumeration );
+  }
+  
+}
\ No newline at end of file

From f031922d636177b8a34f6d7a66af2a462b3aed1b Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Fri, 2 Apr 2021 15:01:10 +0200
Subject: [PATCH 02/40] Tech mapper fixes

---
 include/mockturtle/algorithms/mapper.hpp | 41 +++++++++---------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 1f96ea046..0ac3f4dc2 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -241,36 +241,23 @@ class tech_map_impl
       const auto index = ntk.node_to_index( n );
       auto& node_data = node_match[index];
 
+      node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
+
       if ( ntk.is_constant( n ) )
       {
         /* all terminals have flow 1.0 */
-        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 0.0f;
         node_data.arrival[0] = node_data.arrival[1] = 0.0f;
         match_constants( index );
       }
       else if ( ntk.is_pi( n ) )
       {
         /* all terminals have flow 1.0 */
-        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 0.0f;
         node_data.arrival[0] = 0.0f;
         /* PIs have the negative phase implemented with an inverter */
         node_data.arrival[1] = lib_inv_delay;
       }
-      else
-      {
-        node_data.est_refs[0] = node_data.est_refs[1] = 0.0f;
-        node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
-        ntk.foreach_fanin( n, [&]( auto const& s ) {
-          if ( !ntk.is_pi( ntk.get_node( s ) ) )
-          {
-            const auto c_index = ntk.node_to_index( ntk.get_node( s ) );
-            if ( ntk.is_complemented( s ) )
-              node_match[c_index].est_refs[1] += 1.0f;
-            else
-              node_match[c_index].est_refs[0] += 1.0f;
-          }
-        } );
-      }
     } );
   }
 
@@ -286,7 +273,8 @@ class tech_map_impl
       auto i = 0u;
       for ( auto& cut : cuts.cuts( index ) )
       {
-        if ( cut->size() == 1 )
+        /* ignore unit cut */
+        if ( cut->size() == 1 && *cut->begin() == index )
         {
           ( *cut )->data.ignore = true;
           continue;
@@ -325,7 +313,9 @@ class tech_map_impl
     for ( auto const& n : top_order )
     {
       if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+      {
         continue;
+      }
 
       /* match positive phase */
       match_phase<DO_AREA>( n, 0u );
@@ -900,8 +890,8 @@ class tech_map_impl
       use_one = worst_arrival_npos < ( node_data.required[0] + epsilon - required_margin_factor * lib_inv_delay );
     }
 
-    /* condition on not used phases, evaluate a substitution */
-    if constexpr ( DO_AREA )
+    /* condition on not used phases, evaluate a substitution during exact area recovery */
+    if constexpr ( ELA )
     {
       if ( iteration != 0 )
       {
@@ -927,10 +917,8 @@ class tech_map_impl
           {
             auto size_phase = cuts.cuts( index )[node_data.best_cut[phase]].size();
             auto size_nphase = cuts.cuts( index )[node_data.best_cut[nphase]].size();
-            auto inverter_cost = 0;
-            if ( ELA )
-              inverter_cost = lib_inv_area;
-            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase],  node_data.flows[nphase] + inverter_cost, node_data.flows[phase], size_nphase, size_phase ) )
+
+            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase],  node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) )
             {
               /* invert the choice */
               use_zero = !use_zero;
@@ -1623,12 +1611,14 @@ class exact_map_impl
       {
         /* all terminals have flow 1.0 */
         node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 1.0f;
         node_data.arrival[0] = node_data.arrival[1] = 0.0f;
       }
       else if ( ntk.is_pi( n ) )
       {
         /* all terminals have flow 1.0 */
         node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 1.0f;
         node_data.arrival[0] = 0.0f;
         node_data.arrival[1] = lib_inv_delay;
       }
@@ -1662,7 +1652,8 @@ class exact_map_impl
       auto i = 0u;
       for ( auto& cut : cuts.cuts( index ) )
       {
-        if ( cut->size() == 1 )
+        /* ignore unit cut */
+        if ( cut->size() == 1 && *cut->begin() == index )
         {
           ( *cut )->data.ignore = true;
           continue;

From 53402046df518f7037d0c20d0e160444a3cb701a Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 5 Apr 2021 15:31:51 +0200
Subject: [PATCH 03/40] Tech mapper test updates

---
 include/mockturtle/algorithms/mapper.hpp  |  38 ++++-
 include/mockturtle/utils/tech_library.hpp |   3 +-
 test/algorithms/mapper.cpp                | 183 +++++++++++++++++++---
 3 files changed, 203 insertions(+), 21 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 0ac3f4dc2..79147598f 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -124,7 +124,8 @@ struct map_stats
     std::cout << fmt::format( "[i] Area = {:>5.2f}; Delay = {:>5.2f}\n", area, delay );
     std::cout << fmt::format( "[i] Mapping runtime = {:>5.2f} secs\n", to_seconds( time_mapping ) );
     std::cout << fmt::format( "[i] Total runtime   = {:>5.2f} secs\n", to_seconds( time_total ) );
-    std::cout << "[i] Gates usage report:\n" << gates_usage;
+    if ( !gates_usage.empty() )
+      std::cout << "[i] Gates usage report:\n" << gates_usage;
   }
 };
 
@@ -1714,7 +1715,29 @@ class exact_map_impl
       /* try to drop one phase */
       match_drop_phase<DO_AREA, false>( n, 0u );
     }
+
+    double area_old = area;
     set_mapping_refs<false>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      std::stringstream stats{};
+      float area_gain = 0.0f;
+
+      if ( iteration != 1 )
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+      if constexpr ( DO_AREA )
+      {
+        stats << fmt::format( "[i] AreaFlow : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      }
+      else
+      {
+        stats << fmt::format( "[i] Delay    : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      }
+      st.round_stats.push_back( stats.str() );
+    }
   }
 
 
@@ -1747,7 +1770,18 @@ class exact_map_impl
       /* try to drop one phase */
       match_drop_phase<true, true>( n, 0u );
     }
-    set_mapping_refs<true>();
+
+    double area_old = area;
+    set_mapping_refs<false>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::stringstream stats{};
+      stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      st.round_stats.push_back( stats.str() );
+    }
   }
 
 
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 273284a5a..648b6b8d4 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -372,7 +372,7 @@ struct exact_library_params
       mockturtle::exact_library<mockturtle::mig_network, mockturtle::mig_npn_resynthesis, 4> lib( mig_resyn );
    \endverbatim
  */
-template<typename Ntk, class RewritingFn, unsigned NInputs>
+template<typename Ntk, class RewritingFn, unsigned NInputs = 4u>
 class exact_library
 {
   using supergates_list_t = std::vector<exact_supergate<Ntk, NInputs>>;
@@ -499,6 +499,7 @@ class exact_library
       if ( sg.tdelay[i] != 0.0f )
         sg.n_inputs++;
     }
+    sg.worstDelay *= -1;
   }
 
 
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index 298d9e5a4..478f9ca08 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -3,14 +3,19 @@
 #include <cstdint>
 #include <vector>
 
-#include <lorina/genlib.hpp>
+#include <mockturtle/algorithms/mapper.hpp>
+#include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
+#include <mockturtle/algorithms/node_resynthesis/xmg_npn.hpp>
+#include <mockturtle/algorithms/node_resynthesis/xag_npn.hpp>
+#include <mockturtle/generators/arithmetic.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
 #include <mockturtle/utils/tech_library.hpp>
 #include <mockturtle/networks/aig.hpp>
 #include <mockturtle/networks/mig.hpp>
+#include <mockturtle/networks/xmg.hpp>
+#include <mockturtle/networks/xag.hpp>
 #include <mockturtle/networks/klut.hpp>
-#include <mockturtle/algorithms/mapper.hpp>
-#include <mockturtle/generators/arithmetic.hpp>
+#include <lorina/genlib.hpp>
 
 
 
@@ -21,20 +26,21 @@ std::string const test_library =  "GATE   inv1    1	O=!a;     PIN * INV 1 999 0.
                                   "GATE   nand2	  2	O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
                                   "GATE   xor2	  5	O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
                                   "GATE   mig3    3	O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
+                                  "GATE   buf    	2	O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
                                   "GATE   zero	  0	O=0;\n"
                                   "GATE   one		  0	O=1;";
 
 
 TEST_CASE( "Map of MAJ3", "[mapper]" )
 {
-  std::vector<mockturtle::gate> gates;
+  std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   
   CHECK( result == lorina::return_code::success );
 
-  mockturtle::tech_library<3> lib( gates );
+  tech_library<3> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -58,14 +64,14 @@ TEST_CASE( "Map of MAJ3", "[mapper]" )
 
 TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
 {
-  std::vector<mockturtle::gate> gates;
+  std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   
   CHECK( result == lorina::return_code::success );
 
-  mockturtle::tech_library<3> lib( gates );
+  tech_library<3> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -90,14 +96,14 @@ TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
 
 TEST_CASE( "Map of full adder", "[mapper]" )
 {
-  std::vector<mockturtle::gate> gates;
+  std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   
   CHECK( result == lorina::return_code::success );
 
-  mockturtle::tech_library<3> lib( gates );
+  tech_library<3> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -126,14 +132,14 @@ TEST_CASE( "Map of full adder", "[mapper]" )
 
 TEST_CASE( "Map with inverters", "[mapper]" )
 {
-  std::vector<mockturtle::gate> gates;
+  std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   
   CHECK( result == lorina::return_code::success );
 
-  mockturtle::tech_library<3> lib( gates );
+  tech_library<3> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -163,14 +169,14 @@ TEST_CASE( "Map with inverters", "[mapper]" )
 
 TEST_CASE( "Map for inverters minimization", "[mapper]" )
 {
-  std::vector<mockturtle::gate> gates;
+  std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  auto result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   
   CHECK( result == lorina::return_code::success );
 
-  mockturtle::tech_library<3> lib( gates );
+  tech_library<3> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -194,4 +200,145 @@ TEST_CASE( "Map for inverters minimization", "[mapper]" )
   CHECK( st.area < 4.0f + eps );
   CHECK( st.delay > 2.9f - eps );
   CHECK( st.delay < 2.9f + eps );
+}
+
+TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+  const auto d = aig.create_pi();
+
+  const auto n5 = aig.create_and( a, d );
+  const auto n6 = aig.create_and( a, !c );
+  const auto n7 = aig.create_and( !c, n5 );
+  const auto n8 = aig.create_and( c, n6 );
+  const auto n9 = aig.create_and( !n6, n7 );
+  const auto n10 = aig.create_and( n7, n8 );
+  const auto n11 = aig.create_and( a, n10 );
+  const auto n12 = aig.create_and( !d, n11 );
+  const auto n13 = aig.create_and( !d, !n7 );
+  const auto n14 = aig.create_and( !n6, !n7 );
+
+  aig.create_po( aig.get_constant( true ) );
+  aig.create_po( b );
+  aig.create_po( n9 );
+  aig.create_po( n12 );
+  aig.create_po( !n13 );
+  aig.create_po( n14 );
+
+  map_params ps;
+  map_stats st;
+  klut_network luts = tech_map( aig, lib, ps, &st );
+
+  const float eps{0.005f};
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 4u );
+  CHECK( luts.num_pos() == 6u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 5.0f - eps );
+  CHECK( st.area < 5.0f + eps );
+  CHECK( st.delay > 1.9f - eps );
+  CHECK( st.delay < 1.9f + eps );
+}
+
+TEST_CASE( "Exact map of bad MAJ3 and constant output", "[mapper]" )
+{
+  mig_npn_resynthesis resyn{true};
+
+  exact_library<mig_network, mig_npn_resynthesis>  lib( resyn );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, aig.create_maj( a, b, c ), c );
+  aig.create_po( f );
+  aig.create_po( aig.get_constant( true ) );
+
+  map_params ps;
+  map_stats st;
+  mig_network mig = exact_map( aig, lib, ps, &st );
+
+  CHECK( mig.size() == 5u );
+  CHECK( mig.num_pis() == 3u );
+  CHECK( mig.num_pos() == 2u );
+  CHECK( mig.num_gates() == 1u );
+  CHECK( st.area == 1.0f );
+  CHECK( st.delay == 1.0f );
+}
+
+TEST_CASE( "Exact map of full adder", "[mapper]" )
+{
+  xmg_npn_resynthesis resyn;
+
+  exact_library<xmg_network, xmg_npn_resynthesis>  lib( resyn );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  map_params ps;
+  map_stats st;
+  xmg_network xmg = exact_map( aig, lib, ps, &st );
+
+  CHECK( xmg.size() == 7u );
+  CHECK( xmg.num_pis() == 3u );
+  CHECK( xmg.num_pos() == 2u );
+  CHECK( xmg.num_gates() == 3u );
+  CHECK( st.area == 3.0f );
+  CHECK( st.delay == 2.0f );
+}
+
+TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
+{
+  using resyn_fn = xag_npn_resynthesis<aig_network>;
+
+  resyn_fn resyn;
+
+  exact_library<aig_network, resyn_fn>  lib( resyn );
+
+  aig_network aig;
+  const auto x0 = aig.create_pi();
+  const auto x1 = aig.create_pi();
+  const auto x2 = aig.create_pi();
+
+  const auto n0 = aig.create_and( x1, !x2 );
+  const auto n1 = aig.create_and( !x0, n0 );
+  const auto n2 = aig.create_and( x0, !n0 );
+  const auto n3 = aig.create_and( !n1, !n2 );
+  const auto n4 = aig.create_and( x1, x2 );
+  const auto n5 = aig.create_and( x0, !n4 );
+  const auto n6 = aig.create_and( !x0, n4 );
+  const auto n7 = aig.create_and( !n5, !n6 );
+  aig.create_po( n3 );
+  aig.create_po( n7 );
+  
+  map_params ps;
+  map_stats st;
+  aig_network res = exact_map( aig, lib, ps, &st );
+  
+  CHECK( res.size() == 12 );
+  CHECK( res.num_pis() == 3 );
+  CHECK( res.num_pos() == 2 );
+  CHECK( res.num_gates() == 8 );
+  CHECK( st.area == 8.0f );
+  CHECK( st.delay == 3.0f );
 }
\ No newline at end of file

From b00ee0eb68bc94bae3bf27c2f0e375596a5b33fa Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 27 Apr 2021 14:23:51 +0200
Subject: [PATCH 04/40] Updates and fixes

---
 experiments/mapper.cpp                        |  56 ++---
 .../cut_enumeration/exact_map_cut.hpp         |  14 +-
 .../cut_enumeration/tech_map_cut.hpp          |  14 +-
 include/mockturtle/algorithms/mapper.hpp      | 200 ++++++++----------
 include/mockturtle/utils/tech_library.hpp     | 147 +++++++------
 test/algorithms/mapper.cpp                    |  16 +-
 test/utils/tech_library.cpp                   |  62 +++---
 7 files changed, 243 insertions(+), 266 deletions(-)

diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index bd4f89687..3a59d4d07 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -31,6 +31,8 @@
 #include <lorina/genlib.hpp>
 #include <mockturtle/utils/tech_library.hpp>
 #include <mockturtle/algorithms/mapper.hpp>
+#include <mockturtle/algorithms/node_resynthesis.hpp>
+#include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
 #include <mockturtle/io/aiger_reader.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
 #include <mockturtle/networks/aig.hpp>
@@ -40,27 +42,29 @@
 
 #include <experiments.hpp>
 
-std::string const mcnc_library =  "GATE   inv1    1	O=!a;		        PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   inv3	  3	O=!a;		        PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
-                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
-                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
-                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
-                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
-                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
-                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
-                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
-                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
-                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero	  0	O=0;\n"
-                                  "GATE   one		  0	O=1;";
+std::string const mcnc_library =  "GATE   inv1    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4    4 O=!{abcd};      PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2    3 O=(ab);         PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2     3 O={ab};         PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "#GATE  xor2b   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "#GATE  xnor2b  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21   3 O=!{(ab)c};     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22   4 O=!{(ab)(cd)};  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21   3 O=!({ab}c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22   4 O=!({ab}{cd});  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf     2 O=a;            PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero    0 O=0;\n"
+                                  "GATE   one     0 O=1;";
 
 int main()
 {
@@ -74,14 +78,10 @@ int main()
   std::vector<gate> gates;
 
   std::istringstream in( mcnc_library );
-  if ( lorina::read_genlib( in, genlib_reader( gates ) ) != lorina::return_code::success )
-  {
-    std::cout << "ERROR IN" << std::endl;
-    std::abort();
-    return -1;
-  }
+  lorina::read_genlib( in, genlib_reader( gates ) );
 
-  tech_library<5> lib( gates );
+  tech_library_params tps;
+  tech_library<5> lib( gates, tps );
 
   for ( auto const& benchmark : epfl_benchmarks() )
   {
diff --git a/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
index 9a3455b98..18bdc3c82 100644
--- a/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
@@ -33,10 +33,10 @@
 #pragma once
 
 #include <algorithm>
-#include <vector>
 #include <cstdint>
 #include <iomanip>
 #include <iostream>
+#include <vector>
 
 #include "../cut_enumeration.hpp"
 
@@ -49,16 +49,16 @@ namespace mockturtle
 */
 struct cut_enumeration_exact_map_cut
 {
-  uint32_t delay{0};
-  float flow{0};
-  uint8_t match_index{0};
-  bool ignore{false};
+  uint32_t delay{ 0 };
+  float flow{ 0 };
+  uint8_t match_index{ 0 };
+  bool ignore{ false };
 };
 
 template<bool ComputeTruth>
 bool operator<( cut_type<ComputeTruth, cut_enumeration_exact_map_cut> const& c1, cut_type<ComputeTruth, cut_enumeration_exact_map_cut> const& c2 )
 {
-  constexpr auto eps{0.005f};
+  constexpr auto eps{ 0.005f };
   if ( c1->data.flow < c2->data.flow - eps )
     return true;
   if ( c1->data.flow > c2->data.flow + eps )
@@ -76,7 +76,7 @@ struct cut_enumeration_update_cut<cut_enumeration_exact_map_cut>
   template<typename Cut, typename NetworkCuts, typename Ntk>
   static void apply( Cut& cut, NetworkCuts const& cuts, Ntk const& ntk, node<Ntk> const& n )
   {
-    uint32_t delay{0};
+    uint32_t delay{ 0 };
     float flow = 1.0f;
 
     for ( auto leaf : cut )
diff --git a/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
index 61905c356..6c84013e8 100644
--- a/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
@@ -33,10 +33,10 @@
 #pragma once
 
 #include <algorithm>
-#include <vector>
 #include <cstdint>
 #include <iomanip>
 #include <iostream>
+#include <vector>
 
 #include "../cut_enumeration.hpp"
 
@@ -45,16 +45,16 @@ namespace mockturtle
 
 struct cut_enumeration_tech_map_cut
 {
-  uint32_t delay{0};
-  float flow{0};
-  uint8_t match_index{0};
-  bool ignore{false};
+  uint32_t delay{ 0 };
+  float flow{ 0 };
+  uint8_t match_index{ 0 };
+  bool ignore{ false };
 };
 
 template<bool ComputeTruth>
 bool operator<( cut_type<ComputeTruth, cut_enumeration_tech_map_cut> const& c1, cut_type<ComputeTruth, cut_enumeration_tech_map_cut> const& c2 )
 {
-  constexpr auto eps{0.005f};
+  constexpr auto eps{ 0.005f };
   if ( c1.size() < c2.size() )
     return true;
   if ( c1.size() > c2.size() )
@@ -72,7 +72,7 @@ struct cut_enumeration_update_cut<cut_enumeration_tech_map_cut>
   template<typename Cut, typename NetworkCuts, typename Ntk>
   static void apply( Cut& cut, NetworkCuts const& cuts, Ntk const& ntk, node<Ntk> const& n )
   {
-    uint32_t delay{0};
+    uint32_t delay{ 0 };
     float flow = 1.0f;
 
     for ( auto leaf : cut )
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 79147598f..5a464d31e 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -37,14 +37,14 @@
 
 #include <fmt/format.h>
 
-#include "../utils/stopwatch.hpp"
+#include "../networks/klut.hpp"
 #include "../utils/node_map.hpp"
+#include "../utils/stopwatch.hpp"
 #include "../utils/tech_library.hpp"
 #include "../views/topo_view.hpp"
-#include "../networks/klut.hpp"
 #include "cut_enumeration.hpp"
-#include "cut_enumeration/tech_map_cut.hpp"
 #include "cut_enumeration/exact_map_cut.hpp"
+#include "cut_enumeration/tech_map_cut.hpp"
 
 namespace mockturtle
 {
@@ -70,22 +70,22 @@ struct map_params
   cut_enumeration_params cut_enumeration_ps{};
 
   /*! \brief Required time for delay optimization. */
-  double required_time{0.0f};
+  double required_time{ 0.0f };
 
   /*! \brief Do area optimization. */
-  bool skip_delay_round{false};
+  bool skip_delay_round{ false };
 
   /*! \brief Number of rounds for area flow optimization. */
-  uint32_t area_flow_rounds{1u};
+  uint32_t area_flow_rounds{ 1u };
 
   /*! \brief Number of rounds for exact area optimization. */
-  uint32_t ela_rounds{2u};
+  uint32_t ela_rounds{ 2u };
 
   /*! \brief Use structural choices. */
-  bool choices{false};
+  bool choices{ false };
 
   /*! \brief Be verbose. */
-  bool verbose{false};
+  bool verbose{ false };
 };
 
 /*! \brief Statistics for mapper.
@@ -96,12 +96,12 @@ struct map_params
 struct map_stats
 {
   /*! \brief Area and delay results. */
-  double area{0};
-  double delay{0};
+  double area{ 0 };
+  double delay{ 0 };
 
   /*! \brief Runtime. */
-  stopwatch<>::duration time_mapping{0};
-  stopwatch<>::duration time_total{0};
+  stopwatch<>::duration time_mapping{ 0 };
+  stopwatch<>::duration time_total{ 0 };
 
   /*! \brief Cut enumeration stats. */
   cut_enumeration_stats cut_enumeration_st{};
@@ -113,7 +113,7 @@ struct map_stats
   std::string gates_usage{};
 
   /*! \brief Mapping error. */
-  bool mapping_error{false};
+  bool mapping_error{ false };
 
   void report() const
   {
@@ -125,7 +125,8 @@ struct map_stats
     std::cout << fmt::format( "[i] Mapping runtime = {:>5.2f} secs\n", to_seconds( time_mapping ) );
     std::cout << fmt::format( "[i] Total runtime   = {:>5.2f} secs\n", to_seconds( time_total ) );
     if ( !gates_usage.empty() )
-      std::cout << "[i] Gates usage report:\n" << gates_usage;
+      std::cout << "[i] Gates usage report:\n"
+                << gates_usage;
   }
 };
 
@@ -136,13 +137,13 @@ template<unsigned NInputs>
 struct node_match_tech
 {
   /* best gate match for positive and negative output phases */
-  supergate<NInputs> const* best_supergate[2] = {nullptr, nullptr};
+  supergate<NInputs> const* best_supergate[2] = { nullptr, nullptr };
   /* fanin pin phases for both output phases */
   uint8_t phase[2];
   /* best cut index for both phases */
   uint32_t best_cut[2];
   /* node is mapped using only one phase */
-  bool same_match{false};
+  bool same_match{ false };
 
   /* arrival time at node output */
   double arrival[2];
@@ -159,7 +160,6 @@ struct node_match_tech
   float flows[3];
 };
 
-
 template<class Ntk, unsigned NInputs, typename CutData>
 class tech_map_impl
 {
@@ -262,7 +262,6 @@ class tech_map_impl
     } );
   }
 
-
   void compute_matches()
   {
     /* match gates */
@@ -280,19 +279,19 @@ class tech_map_impl
           ( *cut )->data.ignore = true;
           continue;
         }
-        const auto tt = cuts.truth_table( *cut );
-        if ( tt.num_vars() > NInputs )
+        if ( cut->size() > NInputs )
         {
           /* Ignore cuts too big to be mapped using the library */
           ( *cut )->data.ignore = true;
           continue;
         }
+        const auto tt = cuts.truth_table( *cut );
         const auto fe = kitty::extend_to<NInputs>( tt );
         auto const supergates_pos = library.get_supergates( fe );
         auto const supergates_neg = library.get_supergates( ~fe );
         if ( supergates_pos != nullptr || supergates_neg != nullptr )
         {
-          supergate_t match{supergates_pos, supergates_neg};
+          supergate_t match{ supergates_pos, supergates_neg };
 
           node_matches.push_back( match );
           ( *cut )->data.match_index = i++;
@@ -303,7 +302,7 @@ class tech_map_impl
           ( *cut )->data.ignore = true;
         }
       }
-      
+
       matches[index] = node_matches;
     } );
   }
@@ -354,7 +353,6 @@ class tech_map_impl
     return success;
   }
 
-
   bool compute_mapping_exact_area()
   {
     for ( auto const& n : top_order )
@@ -541,9 +539,9 @@ class tech_map_impl
   {
     for ( auto i = 0u; i < node_match.size(); ++i )
     {
-      node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<float>::max();
+      node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<double>::max();
     }
-    
+
     /* return in case of `skip_delay_round` */
     if ( iteration == 0 )
       return;
@@ -595,7 +593,7 @@ class tech_map_impl
       /* propagate required time over the output inverter if present */
       if ( node_data.same_match && node_data.map_refs[other_phase] > 0 )
       {
-        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );   
+        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );
       }
 
       if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
@@ -629,8 +627,8 @@ class tech_map_impl
   template<bool DO_AREA>
   void match_phase( node<Ntk> const& n, uint8_t phase )
   {
-    float best_arrival = std::numeric_limits<float>::max();
-    float best_area_flow = std::numeric_limits<float>::max();
+    double best_arrival = std::numeric_limits<double>::max();
+    double best_area_flow = std::numeric_limits<double>::max();
     float best_area = std::numeric_limits<float>::max();
     uint32_t best_size = UINT32_MAX;
     uint8_t best_cut = 0u;
@@ -657,7 +655,7 @@ class tech_map_impl
       auto ctr = 0u;
       for ( auto l : cut )
       {
-        float arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
         best_arrival = std::max( best_arrival, arrival_pin );
         ++ctr;
       }
@@ -685,13 +683,13 @@ class tech_map_impl
       for ( auto const& gate : *supergates[phase] )
       {
         node_data.phase[phase] = gate.polarity;
-        float area_local = gate.area + cut_leaves_flow( *cut, n, phase );
-        float worst_arrival = 0.0f;
+        double area_local = gate.area + cut_leaves_flow( *cut, n, phase );
+        double worst_arrival = 0.0f;
 
         auto ctr = 0u;
         for ( auto l : *cut )
         {
-          float arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
           worst_arrival = std::max( worst_arrival, arrival_pin );
           ++ctr;
         }
@@ -727,7 +725,7 @@ class tech_map_impl
 
   void match_phase_exact( node<Ntk> const& n, uint8_t phase )
   {
-    float best_arrival = std::numeric_limits<float>::max();
+    double best_arrival = std::numeric_limits<double>::max();
     float best_exact_area = std::numeric_limits<float>::max();
     float best_area = std::numeric_limits<float>::max();
     uint32_t best_size = UINT32_MAX;
@@ -754,7 +752,7 @@ class tech_map_impl
       auto ctr = 0u;
       for ( auto l : cut )
       {
-        float arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
         best_arrival = std::max( best_arrival, arrival_pin );
         ++ctr;
       }
@@ -794,14 +792,14 @@ class tech_map_impl
       {
         node_data.phase[phase] = gate.polarity;
         node_data.area[phase] = gate.area;
-        auto area_exact = cut_ref( *cut, n, phase );
+        float area_exact = cut_ref( *cut, n, phase );
         cut_deref( *cut, n, phase );
-        float worst_arrival = 0.0f;
+        double worst_arrival = 0.0f;
 
         auto ctr = 0u;
         for ( auto l : *cut )
         {
-          float arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
           worst_arrival = std::max( worst_arrival, arrival_pin );
           ++ctr;
         }
@@ -844,8 +842,8 @@ class tech_map_impl
     auto& node_data = node_match[index];
 
     /* compute arrival adding an inverter to the other match phase */
-    float worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
-    float worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
+    double worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
+    double worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
     bool use_zero = false;
     bool use_one = false;
 
@@ -919,7 +917,7 @@ class tech_map_impl
             auto size_phase = cuts.cuts( index )[node_data.best_cut[phase]].size();
             auto size_nphase = cuts.cuts( index )[node_data.best_cut[nphase]].size();
 
-            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase],  node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) )
+            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) )
             {
               /* invert the choice */
               use_zero = !use_zero;
@@ -945,7 +943,7 @@ class tech_map_impl
     {
       auto size_zero = cuts.cuts( index )[node_data.best_cut[0]].size();
       auto size_one = cuts.cuts( index )[node_data.best_cut[1]].size();
-      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos,  node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
         use_one = false;
       else
         use_zero = false;
@@ -956,7 +954,7 @@ class tech_map_impl
       if constexpr ( ELA )
       {
         /* set cut references */
-        if ( !node_data.same_match ) 
+        if ( !node_data.same_match )
         {
           /* dereference the negative phase cut if in use */
           if ( node_data.map_refs[1] > 0 )
@@ -975,7 +973,7 @@ class tech_map_impl
       if constexpr ( ELA )
       {
         /* set cut references */
-        if ( !node_data.same_match ) 
+        if ( !node_data.same_match )
         {
           /* dereference the positive phase cut if in use */
           if ( node_data.map_refs[0] > 0 )
@@ -991,7 +989,7 @@ class tech_map_impl
     }
   }
 
-  inline void set_match_complemented_phase( uint32_t index, uint8_t phase, float worst_arrival_n )
+  inline void set_match_complemented_phase( uint32_t index, uint8_t phase, double worst_arrival_n )
   {
     auto& node_data = node_match[index];
     auto phase_n = phase ^ 1;
@@ -1050,9 +1048,9 @@ class tech_map_impl
     }
   }
 
-  inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  inline double cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
-    float flow{0.0f};
+    double flow{ 0.0f };
     auto const& node_data = node_match[ntk.node_to_index( n )];
 
     uint8_t ctr = 0u;
@@ -1177,11 +1175,11 @@ class tech_map_impl
 
     old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][0] = dest.get_constant( false );
     old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][1] = dest.get_constant( true );
-    
+
     ntk.foreach_pi( [&]( auto const& n ) {
       old2new[ntk.node_to_index( n )][0] = dest.create_pi();
     } );
-    return {dest, old2new};
+    return { dest, old2new };
   }
 
   void finalize_cover( klut_network& res, klut_map& old2new )
@@ -1210,7 +1208,7 @@ class tech_map_impl
       /* add used cut */
       if ( node_data.same_match || node_data.map_refs[phase] > 0 )
       {
-        create_lut_for_gate( res, old2new, index, phase);
+        create_lut_for_gate( res, old2new, index, phase );
 
         /* add inverted version if used */
         if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
@@ -1221,7 +1219,7 @@ class tech_map_impl
       /* add the optional other match if used */
       if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
       {
-        create_lut_for_gate( res, old2new, index, phase);
+        create_lut_for_gate( res, old2new, index, phase );
       }
 
       return true;
@@ -1245,7 +1243,6 @@ class tech_map_impl
     compute_gates_usage();
   }
 
-
   void create_lut_for_gate( klut_network& res, klut_map& old2new, uint32_t index, unsigned phase )
   {
     auto const& node_data = node_match[index];
@@ -1282,7 +1279,7 @@ class tech_map_impl
   }
 
   template<bool DO_AREA>
-  inline bool compare_map( float arrival, float best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size )
+  inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size )
   {
     if constexpr ( DO_AREA )
     {
@@ -1378,7 +1375,7 @@ class tech_map_impl
     std::stringstream gates_usage;
     double tot_area = 0.0f;
     uint32_t tot_instances = 0u;
-    for ( auto i = 0u; i < gates_profile.size(); ++i ) 
+    for ( auto i = 0u; i < gates_profile.size(); ++i )
     {
       if ( gates_profile[i] > 0u )
       {
@@ -1387,8 +1384,7 @@ class tech_map_impl
         gates_usage << fmt::format( "[i] {:<15}", gates[i].name )
                     << fmt::format( "\t Instance = {:>10d}", gates_profile[i] )
                     << fmt::format( "\t Area = {:>12.2f}", tot_gate_area )
-                    << fmt::format( " {:>8.2f} %", tot_gate_area / area * 100 )
-                    << std::endl;
+                    << fmt::format( " {:>8.2f} %\n", tot_gate_area / area * 100 );
 
         tot_instances += gates_profile[i];
         tot_area += tot_gate_area;
@@ -1397,8 +1393,7 @@ class tech_map_impl
 
     gates_usage << fmt::format( "[i] {:<15}", "TOTAL" )
                 << fmt::format( "\t Instance = {:>10d}", tot_instances )
-                << fmt::format( "\t Area = {:>12.2f}   100.00 %", tot_area )
-                << std::endl;
+                << fmt::format( "\t Area = {:>12.2f}   100.00 %\n", tot_area );
 
     st.gates_usage = gates_usage.str();
   }
@@ -1409,10 +1404,10 @@ class tech_map_impl
   map_params const& ps;
   map_stats& st;
 
-  uint32_t iteration{0}; /* current mapping iteration */
-  double delay{0.0f};    /* current delay of the mapping */
-  double area{0.0f};     /* current area of the mapping */
-  const float epsilon{0.005f}; /* epsilon */
+  uint32_t iteration{ 0 };       /* current mapping iteration */
+  double delay{ 0.0f };          /* current delay of the mapping */
+  double area{ 0.0f };           /* current area of the mapping */
+  const float epsilon{ 0.005f }; /* epsilon */
 
   /* lib inverter info */
   float lib_inv_area;
@@ -1495,7 +1490,6 @@ klut_network tech_map( Ntk const& ntk, tech_library<NInputs> const& library, map
   return res;
 }
 
-
 namespace detail
 {
 
@@ -1503,24 +1497,24 @@ template<typename Ntk, unsigned NInputs>
 struct cut_match_t
 {
   /* list of supergates matching the cut for positive and negative output phases */
-  std::vector<exact_supergate<Ntk, NInputs>> const* supergates[2] = {nullptr, nullptr};
+  std::vector<exact_supergate<Ntk, NInputs>> const* supergates[2] = { nullptr, nullptr };
   /* input permutations, at index i, it contains the permutated position of i */
   std::array<uint8_t, NInputs> permutation{};
   /* permutated input negations */
-  uint8_t negation{0};
+  uint8_t negation{ 0 };
 };
 
 template<typename Ntk, unsigned NInputs>
 struct node_match_t
 {
   /* best supergate match for positive and negative output phases */
-  exact_supergate<Ntk, NInputs> const* best_supergate[2] = {nullptr, nullptr};
+  exact_supergate<Ntk, NInputs> const* best_supergate[2] = { nullptr, nullptr };
   /* fanin pin phases for both output phases */
   uint8_t phase[2];
   /* best cut index for both phases */
   uint32_t best_cut[2];
   /* node is mapped using only one phase */
-  bool same_match{false};
+  bool same_match{ false };
 
   /* arrival time at node output */
   double arrival[2];
@@ -1608,40 +1602,25 @@ class exact_map_impl
       const auto index = ntk.node_to_index( n );
       auto& node_data = node_match[index];
 
+      node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
+
       if ( ntk.is_constant( n ) )
       {
         /* all terminals have flow 1.0 */
-        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
-        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 0.0f;
         node_data.arrival[0] = node_data.arrival[1] = 0.0f;
       }
       else if ( ntk.is_pi( n ) )
       {
         /* all terminals have flow 1.0 */
-        node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = 1.0f;
-        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 1.0f;
+        node_data.flows[0] = node_data.flows[1] = node_data.flows[2] = 0.0f;
         node_data.arrival[0] = 0.0f;
+        /* PIs have the negative phase implemented with an inverter */
         node_data.arrival[1] = lib_inv_delay;
       }
-      else
-      {
-        node_data.est_refs[0] = node_data.est_refs[1] = 0.0f;
-        node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
-        ntk.foreach_fanin( n, [&]( auto const& s ) {
-          if ( !ntk.is_pi( ntk.get_node( s ) ) )
-          {
-            const auto c_index = ntk.node_to_index( ntk.get_node( s ) );
-            if ( ntk.is_complemented( s ) )
-              node_match[c_index].est_refs[1] += 1.0f;
-            else
-              node_match[c_index].est_refs[0] += 1.0f;
-          }
-        } );
-      }
     } );
   }
 
-
   void compute_matches()
   {
     /* match gates */
@@ -1660,6 +1639,13 @@ class exact_map_impl
           continue;
         }
 
+        if ( cut->size() > NInputs )
+        {
+          /* Ignore cuts too big to be mapped using the library */
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+
         /* match the cut using canonization and get the gates */
         const auto tt = cuts.truth_table( *cut );
         const auto fe = kitty::extend_to<NInputs>( tt );
@@ -1676,7 +1662,7 @@ class exact_map_impl
 
           match.supergates[phase] = supergates_npn;
           match.supergates[phase ^ 1] = supergates_npn_neg;
-          
+
           /* store permutations and negations */
           match.negation = 0;
           for ( auto j = 0u; j < perm.size() && j < NInputs; ++j )
@@ -1693,7 +1679,7 @@ class exact_map_impl
           ( *cut )->data.ignore = true;
         }
       }
-      
+
       matches[index] = node_matches;
     } );
   }
@@ -1740,7 +1726,6 @@ class exact_map_impl
     }
   }
 
-
   void compute_exact_area()
   {
     for ( auto const& n : top_order )
@@ -1772,7 +1757,7 @@ class exact_map_impl
     }
 
     double area_old = area;
-    set_mapping_refs<false>();
+    set_mapping_refs<true>();
 
     /* round stats */
     if ( ps.verbose )
@@ -1784,7 +1769,6 @@ class exact_map_impl
     }
   }
 
-
   void finalize_cover( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
   {
     auto const& db = library.get_database();
@@ -1815,12 +1799,12 @@ class exact_map_impl
           children[i] = !children[i];
         }
       }
-      topo_view topo{db, supergate->root};
+      topo_view topo{ db, supergate->root };
       auto f = cleanup_dangling( topo, res, children.begin(), children.end() ).front();
 
       if ( phase == 1 )
         f = !f;
-      
+
       old2new[n] = f;
       return true;
     } );
@@ -1957,7 +1941,7 @@ class exact_map_impl
     {
       node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<float>::max();
     }
-    
+
     /* return in case of `skip_delay_round` */
     if ( iteration == 0 )
       return;
@@ -2009,7 +1993,7 @@ class exact_map_impl
       /* propagate required time over output inverter if present */
       if ( node_data.same_match && node_data.map_refs[other_phase] > 0 )
       {
-        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );   
+        node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );
       }
 
       if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
@@ -2114,7 +2098,7 @@ class exact_map_impl
       /* match each gate and take the best one */
       for ( auto const& gate : *supergates.supergates[phase] )
       {
-        uint8_t complement = gate.polarity ^ supergates.negation;
+        uint8_t complement = supergates.negation;
         node_data.phase[phase] = complement;
         float area_local = gate.area + cut_leaves_flow( *cut, n, phase );
         float worst_arrival = 0.0f;
@@ -2168,7 +2152,6 @@ class exact_map_impl
     auto& cut_matches = matches[index];
     exact_supergate<NtkDest, NInputs> const* best_supergate = node_data.best_supergate[phase];
 
-
     /* recompute best match info */
     if ( best_supergate != nullptr )
     {
@@ -2183,7 +2166,7 @@ class exact_map_impl
         children[supergates.permutation[ctr++]] = l;
       }
 
-      best_phase = best_supergate->polarity ^ supergates.negation;
+      best_phase = supergates.negation;
       best_arrival = 0.0f;
       best_area = best_supergate->area;
       best_cut = node_data.best_cut[phase];
@@ -2234,7 +2217,7 @@ class exact_map_impl
 
       for ( auto const& gate : *supergates.supergates[phase] )
       {
-        uint8_t complement = gate.polarity ^ supergates.negation;
+        uint8_t complement = supergates.negation;
         node_data.phase[phase] = complement;
         node_data.area[phase] = gate.area;
         auto area_exact = cut_ref( *cut, n, phase );
@@ -2345,7 +2328,7 @@ class exact_map_impl
     {
       auto size_zero = cuts.cuts( index )[node_data.best_cut[0]].size();
       auto size_one = cuts.cuts( index )[node_data.best_cut[1]].size();
-      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos,  node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
         use_one = false;
       else
         use_zero = false;
@@ -2355,7 +2338,7 @@ class exact_map_impl
     {
       if constexpr ( ELA )
       {
-        if ( !node_data.same_match ) 
+        if ( !node_data.same_match )
         {
           if ( node_data.map_refs[1] > 0 )
             cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
@@ -2371,7 +2354,7 @@ class exact_map_impl
     {
       if constexpr ( ELA )
       {
-        if ( !node_data.same_match ) 
+        if ( !node_data.same_match )
         {
           if ( node_data.map_refs[0] > 0 )
             cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
@@ -2402,7 +2385,7 @@ class exact_map_impl
 
   inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
-    float flow{0.0f};
+    float flow{ 0.0f };
     auto const& node_data = node_match[ntk.node_to_index( n )];
     auto const& match = matches[ntk.node_to_index( n )][cut->data.match_index];
 
@@ -2576,17 +2559,16 @@ class exact_map_impl
     return false;
   }
 
-
 private:
   Ntk& ntk;
   exact_library<NtkDest, RewritingFn, NInputs> const& library;
   map_params const& ps;
   map_stats& st;
 
-  uint32_t iteration{0}; /* current mapping iteration */
-  double delay{0.0f};     /* current delay of the mapping */
-  double area{0.0f};      /* current area of the mapping */
-  const float epsilon{0.005f}; /* epsilon */
+  uint32_t iteration{ 0 };       /* current mapping iteration */
+  double delay{ 0.0f };          /* current delay of the mapping */
+  double area{ 0.0f };           /* current area of the mapping */
+  const float epsilon{ 0.005f }; /* epsilon */
 
   /* lib inverter info */
   float lib_inv_area;
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 648b6b8d4..2cab50ffc 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -33,14 +33,14 @@
 #pragma once
 
 #include <cassert>
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
 #include <kitty/constructors.hpp>
 #include <kitty/dynamic_truth_table.hpp>
-#include <kitty/static_truth_table.hpp>
 #include <kitty/npn.hpp>
 #include <kitty/print.hpp>
+#include <kitty/static_truth_table.hpp>
 
 #include "../io/genlib_reader.hpp"
 
@@ -48,50 +48,49 @@ namespace mockturtle
 {
 
 /*
-std::string const mcnc_library =  "GATE   inv1    1	O=!a;		        PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   inv3	  3	O=!a;		        PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
-                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
-                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
-                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
-                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
-                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
-                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
-                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
-                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
-                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "#GATE  xor2b	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "#GATE  xnor2b	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero	  0	O=0;\n"
-                                  "GATE   one		  0	O=1;";
+std::string const mcnc_library =  "GATE   inv1    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4    4 O=!{abcd};      PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2    3 O=(ab);         PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2     3 O={ab};         PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "#GATE  xor2b   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "#GATE  xnor2b  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21   3 O=!{(ab)c};     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22   4 O=!{(ab)(cd)};  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21   3 O=!({ab}c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22   4 O=!({ab}{cd});  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf     2 O=a;            PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero    0 O=0;\n"
+                                  "GATE   one     0 O=1;";
 */
 
 struct tech_library_params
 {
   /*! \brief reports np enumerations */
-  bool verbose{false};
+  bool verbose{ false };
 
   /*! \brief reports all the entries in the library */
-  bool very_verbose{false};
+  bool very_verbose{ false };
 };
 
-
 template<unsigned NInputs>
 struct supergate
 {
   struct gate const* root{};
 
   /* area */
-  float area{0};
+  float area{ 0 };
   /* worst delay */
-  float worstDelay{0};
+  float worstDelay{ 0 };
   /* pin-to-pin delay */
   std::array<float, NInputs> tdelay{};
 
@@ -99,7 +98,7 @@ struct supergate
   std::vector<uint8_t> permutation{};
 
   /* pin negations */
-  uint8_t polarity{0};
+  uint8_t polarity{ 0 };
 };
 
 /*! \brief Library of np-enumerated gates
@@ -127,10 +126,10 @@ class tech_library
   using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
 
 public:
-  tech_library( std::vector<gate> const gates, tech_library_params const ps = {} )
-    : _gates( gates ),
-      _ps ( ps ),
-      _super_lib()
+  tech_library( std::vector<gate> const& gates, tech_library_params const ps = {} )
+      : _gates( gates ),
+        _ps( ps ),
+        _super_lib()
   {
     generate_library();
   }
@@ -155,7 +154,7 @@ class tech_library
 
   const std::vector<gate> get_gates() const
   {
-      return _gates;
+    return _gates;
   }
 
 private:
@@ -203,7 +202,7 @@ class tech_library
 
         for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
         {
-          sg.tdelay[i] = worst_delay;  /* if pin-to-pin delay change to: gate.delay[perm[i]] */
+          sg.tdelay[i] = worst_delay;                     /* if pin-to-pin delay change to: gate.delay[perm[i]] */
           sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
         }
         for ( auto i = perm.size(); i < NInputs; ++i )
@@ -303,55 +302,53 @@ class tech_library
 
 private:
   /* inverter info */
-  float _inv_area{0.0};
-  float _inv_delay{0.0};
-  uint32_t _inv_id{UINT32_MAX};
+  float _inv_area{ 0.0 };
+  float _inv_delay{ 0.0 };
+  uint32_t _inv_id{ UINT32_MAX };
 
-  unsigned _max_size{0}; /* max #fanins of the gates in the library */
+  unsigned _max_size{ 0 }; /* max #fanins of the gates in the library */
 
   std::vector<gate> const _gates; /* collection of gates */
   tech_library_params const _ps;
   lib_t _super_lib; /* library of enumerated gates */
 };
 
-
-
 template<typename Ntk, unsigned NInputs>
 struct exact_supergate
 {
   signal<Ntk> const root;
 
   /* number of inputs of the supergate */
-  uint8_t n_inputs{0};
+  uint8_t n_inputs{ 0 };
   /* saved polarities for inputs and/or outputs */
-  uint8_t polarity{0};
-  
+  uint8_t polarity{ 0 };
+
   /* area */
-  float area{0};
+  float area{ 0 };
   /* worst delay */
-  float worstDelay{0};
+  float worstDelay{ 0 };
   /* pin-to-pin delay */
-  std::array<float, NInputs> tdelay{0};
+  std::array<float, NInputs> tdelay{ 0 };
 
   exact_supergate( signal<Ntk> const root )
-    : root( root ) {}
+      : root( root ) {}
 };
 
 struct exact_library_params
 {
   /* area of a gate */
-  float area_gate{1.0f};
+  float area_gate{ 1.0f };
   /* area of an inverter */
-  float area_inverter{0.0f};
+  float area_inverter{ 0.0f };
   /* delay of a gate */
-  float delay_gate{1.0f};
+  float delay_gate{ 1.0f };
   /* delay of an inverter */
-  float delay_inverter{0.0f};
+  float delay_inverter{ 0.0f };
 
   /* classify in NP instead of NPN */
-  bool np_classification{true};
+  bool np_classification{ true };
   /* verbose */
-  bool verbose{false};
+  bool verbose{ false };
 };
 
 /*! \brief Library of exact synthesis supergates
@@ -381,10 +378,10 @@ class exact_library
 
 public:
   exact_library( RewritingFn const& rewriting_fn, exact_library_params const& ps = {} )
-  : _database(),
-    _rewriting_fn( rewriting_fn ),
-    _ps( ps ),
-    _super_lib()
+      : _database(),
+        _rewriting_fn( rewriting_fn ),
+        _ps( ps ),
+        _super_lib()
   {
     generate_library();
   }
@@ -397,7 +394,7 @@ class exact_library
     return nullptr;
   }
 
-  const Ntk &get_database() const
+  const Ntk& get_database() const
   {
     return _database;
   }
@@ -427,7 +424,7 @@ class exact_library
     } while ( !kitty::is_const0( tt ) );
 
     /* Constuct supergates */
-    for ( auto const &entry : classes )
+    for ( auto const& entry : classes )
     {
       supergates_list_t supergates_pos;
       supergates_list_t supergates_neg;
@@ -436,7 +433,8 @@ class exact_library
       const auto add_supergate = [&]( auto const& f_new ) {
         bool complemented = _database.is_complemented( f_new );
         auto f = f_new;
-        if ( _ps.np_classification && complemented ) {
+        if ( _ps.np_classification && complemented )
+        {
           f = !f;
         }
         exact_supergate<Ntk, NInputs> sg( f );
@@ -456,20 +454,20 @@ class exact_library
       kitty::dynamic_truth_table function = kitty::extend_to( entry, NInputs );
       _rewriting_fn( _database, function, pis.begin(), pis.end(), add_supergate );
       if ( supergates_pos.size() > 0 )
-        _super_lib.insert( {entry, supergates_pos} );
+        _super_lib.insert( { entry, supergates_pos } );
       if ( _ps.np_classification && supergates_neg.size() > 0 )
-        _super_lib.insert( {not_entry, supergates_neg} );
+        _super_lib.insert( { not_entry, supergates_neg } );
     }
 
     if ( _ps.verbose )
     {
       std::cout << "Classified in " << _super_lib.size() << " entries" << std::endl;
-      for ( auto const &pair : _super_lib )
+      for ( auto const& pair : _super_lib )
       {
         kitty::print_hex( pair.first );
         std::cout << ": ";
 
-        for ( auto const&  gate : pair.second )
+        for ( auto const& gate : pair.second )
         {
           printf( "%.2f,%.2f,%d,%d,:", gate.worstDelay, gate.area, gate.polarity, gate.n_inputs );
           for ( auto j = 0u; j < NInputs; ++j )
@@ -482,7 +480,7 @@ class exact_library
   }
 
   /* Computes delay and area info */
-  void compute_info( exact_supergate<Ntk, NInputs> &sg )
+  void compute_info( exact_supergate<Ntk, NInputs>& sg )
   {
     _database.incr_trav_id();
     /* info does not consider input and output inverters */
@@ -493,17 +491,16 @@ class exact_library
     /* output polarity */
     sg.polarity |= ( unsigned( compl_root ) ) << NInputs;
     /* number of inputs */
-    for( auto i = 0u; i < NInputs; ++i )
+    for ( auto i = 0u; i < NInputs; ++i )
     {
-      sg.tdelay[i] *= -1;   /* invert to positive value */
+      sg.tdelay[i] *= -1; /* invert to positive value */
       if ( sg.tdelay[i] != 0.0f )
         sg.n_inputs++;
     }
     sg.worstDelay *= -1;
   }
 
-
-  float compute_info_rec( exact_supergate<Ntk, NInputs> &sg, signal<Ntk> const& root, float delay )
+  float compute_info_rec( exact_supergate<Ntk, NInputs>& sg, signal<Ntk> const& root, float delay )
   {
     auto n = _database.get_node( root );
 
@@ -515,8 +512,8 @@ class exact_library
 
     if ( _database.is_pi( n ) )
     {
-      sg.tdelay[_database.index_to_node( n ) - 1u] = std::min(sg.tdelay[_database.index_to_node( n ) - 1u], tdelay);
-      sg.worstDelay = std::min(sg.worstDelay, tdelay);
+      sg.tdelay[_database.index_to_node( n ) - 1u] = std::min( sg.tdelay[_database.index_to_node( n ) - 1u], tdelay );
+      sg.worstDelay = std::min( sg.worstDelay, tdelay );
       sg.polarity |= ( unsigned( _database.is_complemented( root ) ) ) << ( _database.index_to_node( n ) - 1u );
       return area;
     }
@@ -556,4 +553,4 @@ class exact_library
   lib_t _super_lib;
 };
 
-}
+} // namespace mockturtle
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index 478f9ca08..c92dc860b 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -21,14 +21,14 @@
 
 using namespace mockturtle;
 
-std::string const test_library =  "GATE   inv1    1	O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv2	  2	O=!a;		  PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   nand2	  2	O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   xor2	  5	O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   mig3    3	O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
-                                  "GATE   buf    	2	O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero	  0	O=0;\n"
-                                  "GATE   one		  0	O=1;";
+std::string const test_library =  "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   xor2    5 O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   mig3    3 O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
+                                  "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero    0 O=0;\n"
+                                  "GATE   one     0 O=1;";
 
 
 TEST_CASE( "Map of MAJ3", "[mapper]" )
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index 99cdda230..2b23f306e 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -14,32 +14,33 @@
 
 using namespace mockturtle;
 
-std::string const simple_test_library = "GATE   inv1    1	O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                        "GATE   inv2	  2	O=!a;		  PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                        "GATE   nand2	  2	O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
-
-std::string const test_library =  "GATE   inv1    3	O=!a;		        PIN * INV 1 999 1.1 0.09 1.1 0.09\n"
-                                  "GATE   inv2	  2	O=!a;		        PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   inv3	  1	O=!a;		        PIN * INV 3 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv4	  4	O=!a;		        PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
-                                  "GATE   nand2	  2	O=!(ab);		    PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   nand3	  3	O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
-                                  "GATE   nand4   4	O=!(abcd);	    PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
-                                  "GATE   nor2	  2	O=!{ab};		    PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
-                                  "GATE   nor3	  3	O=!{abc};	      PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
-                                  "GATE   nor4	  4	O=!{abcd};	    PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
-                                  "GATE   and2	  3	O=(ab);		      PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
-                                  "GATE   or2		  3	O={ab};		      PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
-                                  "GATE   xor2a	  5	O=[ab];     	  PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   xnor2a	5	O=![ab];		    PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "GATE   aoi21	  3	O=!{(ab)c};	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   aoi22	  4	O=!{(ab)(cd)};	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   oai21	  3	O=!({ab}c);	    PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   oai22	  4	O=!({ab}{cd});	PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   buf    	2	O=a;        	  PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero	  0	O=0;\n"
-                                  "GATE   one		  0	O=1;";
-
+std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                        "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                        "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
+
+std::string const test_library =  "GATE   inv1    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                  "GATE   inv3    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                  "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                  "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                  "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                  "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                  "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                  "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                  "GATE   nor4    4 O=!{abcd};      PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                  "GATE   and2    3 O=(ab);         PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                  "GATE   or2     3 O={ab};         PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                  "GATE   xor2a   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "#GATE  xor2b   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                  "GATE   xnor2a  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "#GATE  xnor2b  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                  "GATE   aoi21   3 O=!{(ab)c};     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   aoi22   4 O=!{(ab)(cd)};  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   oai21   3 O=!({ab}c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                  "GATE   oai22   4 O=!({ab}{cd});  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                  "GATE   buf     2 O=a;            PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                  "GATE   zero    0 O=0;\n"
+                                  "GATE   one     0 O=1;";
 
 TEST_CASE( "Simple library generation", "[tech_library]" )
 {
@@ -122,12 +123,9 @@ TEST_CASE( "Complete library generation", "[tech_library]" )
   std::vector<gate> gates;
 
   std::istringstream in( test_library );
-  if ( lorina::read_genlib( in, genlib_reader( gates ) ) != lorina::return_code::success )
-  {
-    std::cout << "ERROR IN" << std::endl;
-    std::abort();
-    return;
-  }
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
 
   tech_library<4> lib( gates );
 

From 350c1ec8650a5ce48270eaa8052533a755ef664b Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Wed, 28 Apr 2021 17:43:05 +0200
Subject: [PATCH 05/40] Mapper updates: switching power optimization and
 templates restructuring

---
 .../mockturtle/algorithms/cut_enumeration.hpp | 485 ++++++++++++++++++
 .../algorithms/detail/switching_activity.hpp  |  71 +++
 include/mockturtle/algorithms/mapper.hpp      | 221 ++++++--
 include/mockturtle/utils/tech_library.hpp     |   6 +-
 4 files changed, 732 insertions(+), 51 deletions(-)
 create mode 100644 include/mockturtle/algorithms/detail/switching_activity.hpp

diff --git a/include/mockturtle/algorithms/cut_enumeration.hpp b/include/mockturtle/algorithms/cut_enumeration.hpp
index 5f89c9fa0..ef094e2e9 100644
--- a/include/mockturtle/algorithms/cut_enumeration.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration.hpp
@@ -30,6 +30,7 @@
   \author Heinz Riener
   \author Mathias Soeken
   \author Sahand Kashani-Akhavan
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -609,6 +610,490 @@ network_cuts<Ntk, ComputeTruth, CutData> cut_enumeration( Ntk const& ntk, cut_en
   return res;
 }
 
+/* forward declarations */
+/*! \cond PRIVATE */
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
+struct fast_network_cuts;
+
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth = false, typename CutData = empty_cut_data>
+fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData> fast_cut_enumeration( Ntk const& ntk, cut_enumeration_params const& ps = {}, cut_enumeration_stats * pst = nullptr );
+
+namespace detail
+{
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
+class fast_cut_enumeration_impl;
+}
+/*! \endcond */
+
+/*! \brief Cut database for a network.
+ *
+ * The function `cut_enumeration` returns an instance of type `fast_network_cuts`
+ * which contains a cut database and can be queried to return all cuts of a
+ * node, or the function of a cut (if it was computed).
+ *
+ * Comparing to `network_cuts`, it uses static truth tables instead of
+ * dynamic truth tables to speed-up the truth table computation.
+ * 
+ * An instance of type `fast_network_cuts` can only be constructed from the
+ * `fast_cut_enumeration` algorithm.
+ */
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
+struct fast_network_cuts
+{
+public:
+  static constexpr uint32_t max_cut_num = 26;
+  using cut_t = cut_type<ComputeTruth, CutData>;
+  using cut_set_t = cut_set<cut_t, max_cut_num>;
+  static constexpr bool compute_truth = ComputeTruth;
+
+private:
+  explicit fast_network_cuts( uint32_t size ) : _cuts( size )
+  {
+    kitty::static_truth_table<NumVars> zero, proj;
+    kitty::create_nth_var( proj, 0u );
+
+    _truth_tables.insert( zero );
+    _truth_tables.insert( proj );
+  }
+
+public:
+  /*! \brief Returns the cut set of a node */
+  cut_set_t& cuts( uint32_t node_index ) { return _cuts[node_index]; }
+
+  /*! \brief Returns the cut set of a node */
+  cut_set_t const& cuts( uint32_t node_index ) const { return _cuts[node_index]; }
+
+  /*! \brief Returns the truth table of a cut */
+  template<bool enabled = ComputeTruth, typename = std::enable_if_t<std::is_same_v<Ntk, Ntk> && enabled>>
+  auto truth_table( cut_t const& cut ) const
+  {
+    return _truth_tables[cut->func_id];
+  }
+
+  /*! \brief Returns the total number of tuples that were tried to be merged */
+  auto total_tuples() const
+  {
+    return _total_tuples;
+  }
+
+  /*! \brief Returns the total number of cuts in the database. */
+  auto total_cuts() const
+  {
+    return _total_cuts;
+  }
+
+  /*! \brief Returns the number of nodes for which cuts are computed */
+  auto nodes_size() const
+  {
+    return _cuts.size();
+  }
+
+  /* compute positions of leave indices in cut `sub` (subset) with respect to
+   * leaves in cut `sup` (super set).
+   *
+   * Example:
+   *   compute_truth_table_support( {1, 3, 6}, {0, 1, 2, 3, 6, 7} ) = {1, 3, 4}
+   */
+  std::vector<uint8_t> compute_truth_table_support( cut_t const& sub, cut_t const& sup ) const
+  {
+    std::vector<uint8_t> support;
+    support.reserve( sub.size() );
+
+    auto itp = sup.begin();
+    for ( auto i : sub )
+    {
+      itp = std::find( itp, sup.end(), i );
+      support.push_back( static_cast<uint8_t>( std::distance( sup.begin(), itp ) ) );
+    }
+
+    return support;
+  }
+
+  /*! \brief Inserts a truth table into the truth table cache.
+   *
+   * This message can be used when manually adding or modifying cuts from the
+   * cut sets.
+   *
+   * \param tt Truth table to add
+   * \return Literal id from the truth table store
+   */
+  uint32_t insert_truth_table( kitty::static_truth_table<NumVars> const& tt )
+  {
+    return _truth_tables.insert( tt );
+  }
+
+private:
+  template<typename _Ntk, uint32_t _NumVars, bool _ComputeTruth, typename _CutData>
+  friend class detail::fast_cut_enumeration_impl;
+
+  template<typename _Ntk, uint32_t _NumVars, bool _ComputeTruth, typename _CutData>
+  friend fast_network_cuts<_Ntk, _NumVars, _ComputeTruth, _CutData> fast_cut_enumeration( _Ntk const& ntk, cut_enumeration_params const& ps, cut_enumeration_stats * pst );
+
+private:
+  void add_zero_cut( uint32_t index )
+  {
+    auto& cut = _cuts[index].add_cut( &index, &index ); /* fake iterator for emptyness */
+
+    if constexpr ( ComputeTruth )
+    {
+      cut->func_id = 0;
+    }
+  }
+
+  void add_unit_cut( uint32_t index )
+  {
+    auto& cut = _cuts[index].add_cut( &index, &index + 1 );
+
+    if constexpr ( ComputeTruth )
+    {
+      cut->func_id = 2;
+    }
+  }
+
+private:
+  /* compressed representation of cuts */
+  std::vector<cut_set_t> _cuts;
+
+  /* cut truth tables */
+  truth_table_cache<kitty::static_truth_table<NumVars>> _truth_tables;
+
+  /* statistics */
+  uint32_t _total_tuples{};
+  std::size_t _total_cuts{};
+};
+
+/*! \cond PRIVATE */
+namespace detail
+{
+
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
+class fast_cut_enumeration_impl
+{
+public:
+  using cut_t = typename fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData>::cut_t;
+  using cut_set_t = typename fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData>::cut_set_t;
+
+  explicit fast_cut_enumeration_impl( Ntk const& ntk, cut_enumeration_params const& ps, cut_enumeration_stats& st, fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData>& cuts )
+      : ntk( ntk ),
+        ps( ps ),
+        st( st ),
+        cuts( cuts )
+  {
+    assert( ps.cut_limit < cuts.max_cut_num && "cut_limit exceeds the compile-time limit for the maximum number of cuts" );
+  }
+
+public:
+  void run()
+  {
+    stopwatch t( st.time_total );
+
+    ntk.foreach_node( [this]( auto node ) {
+      const auto index = ntk.node_to_index( node );
+
+      if ( ps.very_verbose )
+      {
+        std::cout << fmt::format( "[i] compute cut for node at index {}\n", index );
+      }
+
+      if ( ntk.is_constant( node ) )
+      {
+        cuts.add_zero_cut( index );
+      }
+      else if ( ntk.is_pi( node ) )
+      {
+        cuts.add_unit_cut( index );
+      }
+      else
+      {
+        if constexpr ( Ntk::min_fanin_size == 2 && Ntk::max_fanin_size == 2 )
+        {
+          merge_cuts2( index );
+        }
+        else
+        {
+          merge_cuts( index );
+        }
+      }
+    } );
+  }
+
+private:
+  uint32_t compute_truth_table( uint32_t index, std::vector<cut_t const*> const& vcuts, cut_t& res )
+  {
+    stopwatch t( st.time_truth_table );
+
+    std::vector<kitty::static_truth_table<NumVars>> tt( vcuts.size() );
+    auto i = 0;
+    for ( auto const& cut : vcuts )
+    {
+      tt[i] = cuts._truth_tables[( *cut )->func_id];
+      const auto supp = cuts.compute_truth_table_support( *cut, res );
+      kitty::expand_inplace( tt[i], supp );
+      ++i;
+    }
+
+    auto tt_res = ntk.compute( ntk.index_to_node( index ), tt.begin(), tt.end() );
+
+    if ( ps.minimize_truth_table )
+    {
+      const auto support = kitty::min_base_inplace( tt_res );
+      if ( support.size() != res.size() )
+      {
+        std::vector<uint32_t> leaves_before( res.begin(), res.end() );
+        std::vector<uint32_t> leaves_after( support.size() );
+
+        auto it_support = support.begin();
+        auto it_leaves = leaves_after.begin();
+        while ( it_support != support.end() )
+        {
+          *it_leaves++ = leaves_before[*it_support++];
+        }
+        res.set_leaves( leaves_after.begin(), leaves_after.end() );
+      }
+    }
+
+    return cuts._truth_tables.insert( tt_res );
+  }
+
+  void merge_cuts2( uint32_t index )
+  {
+    const auto fanin = 2;
+
+    uint32_t pairs{1};
+    ntk.foreach_fanin( ntk.index_to_node( index ), [this, &pairs]( auto child, auto i ) {
+      lcuts[i] = &cuts.cuts( ntk.node_to_index( ntk.get_node( child ) ) );
+      pairs *= static_cast<uint32_t>( lcuts[i]->size() );
+    } );
+    lcuts[2] = &cuts.cuts( index );
+    auto& rcuts = *lcuts[fanin];
+    rcuts.clear();
+
+    cut_t new_cut;
+
+    std::vector<cut_t const*> vcuts( fanin );
+
+    cuts._total_tuples += pairs;
+    for ( auto const& c1 : *lcuts[0] )
+    {
+      for ( auto const& c2 : *lcuts[1] )
+      {
+        if ( !c1->merge( *c2, new_cut, NumVars ) )
+        {
+          continue;
+        }
+
+        if ( rcuts.is_dominated( new_cut ) )
+        {
+          continue;
+        }
+
+        if constexpr ( ComputeTruth )
+        {
+          vcuts[0] = c1;
+          vcuts[1] = c2;
+          new_cut->func_id = compute_truth_table( index, vcuts, new_cut );
+        }
+
+        cut_enumeration_update_cut<CutData>::apply( new_cut, cuts, ntk, index );
+
+        rcuts.insert( new_cut );
+      }
+    }
+
+    /* limit the maximum number of cuts */
+    rcuts.limit( ps.cut_limit - 1 );
+
+    cuts._total_cuts += rcuts.size();
+
+    if ( rcuts.size() > 1 || ( *rcuts.begin() )->size() > 1 )
+    {
+      cuts.add_unit_cut( index );
+    }
+  }
+
+  void merge_cuts( uint32_t index )
+  {
+    uint32_t pairs{1};
+    std::vector<uint32_t> cut_sizes;
+    ntk.foreach_fanin( ntk.index_to_node( index ), [this, &pairs, &cut_sizes]( auto child, auto i ) {
+      lcuts[i] = &cuts.cuts( ntk.node_to_index( ntk.get_node( child ) ) );
+      cut_sizes.push_back( static_cast<uint32_t>( lcuts[i]->size() ) );
+      pairs *= cut_sizes.back();
+    } );
+
+    const auto fanin = cut_sizes.size();
+    lcuts[fanin] = &cuts.cuts( index );
+
+    auto& rcuts = *lcuts[fanin];
+
+    if ( fanin > 1 && fanin <= ps.fanin_limit )
+    {
+      rcuts.clear();
+
+      cut_t new_cut, tmp_cut;
+
+      std::vector<cut_t const*> vcuts( fanin );
+
+      cuts._total_tuples += pairs;
+      foreach_mixed_radix_tuple( cut_sizes.begin(), cut_sizes.end(), [&]( auto begin, auto end ) {
+        auto it = vcuts.begin();
+        auto i = 0u;
+        while ( begin != end )
+        {
+          *it++ = &( ( *lcuts[i++] )[*begin++] );
+        }
+
+        if ( !vcuts[0]->merge( *vcuts[1], new_cut, NumVars ) )
+        {
+          return true; /* continue */
+        }
+
+        for ( i = 2; i < fanin; ++i )
+        {
+          tmp_cut = new_cut;
+          if ( !vcuts[i]->merge( tmp_cut, new_cut, NumVars ) )
+          {
+            return true; /* continue */
+          }
+        }
+
+        if ( rcuts.is_dominated( new_cut ) )
+        {
+          return true; /* continue */
+        }
+
+        if constexpr ( ComputeTruth )
+        {
+          new_cut->func_id = compute_truth_table( index, vcuts, new_cut );
+        }
+
+        cut_enumeration_update_cut<CutData>::apply( new_cut, cuts, ntk, ntk.index_to_node( index ) );
+
+        rcuts.insert( new_cut );
+
+        return true;
+      } );
+
+      /* limit the maximum number of cuts */
+      rcuts.limit( ps.cut_limit - 1 );
+    } else if ( fanin == 1 ) {
+      rcuts.clear();
+
+      for ( auto const& cut : *lcuts[0] ) {
+        cut_t new_cut = *cut;
+
+        if constexpr ( ComputeTruth )
+        {
+          new_cut->func_id = compute_truth_table( index, {cut}, new_cut );
+        }
+
+        cut_enumeration_update_cut<CutData>::apply( new_cut, cuts, ntk, ntk.index_to_node( index ) );
+
+        rcuts.insert( new_cut );
+      }
+
+      /* limit the maximum number of cuts */
+      rcuts.limit( ps.cut_limit - 1 );
+    }
+
+    cuts._total_cuts += static_cast<uint32_t>( rcuts.size() );
+
+    cuts.add_unit_cut( index );
+  }
+
+private:
+  Ntk const& ntk;
+  cut_enumeration_params const& ps;
+  cut_enumeration_stats& st;
+  fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData>& cuts;
+
+  std::array<cut_set_t*, Ntk::max_fanin_size + 1> lcuts;
+};
+} /* namespace detail */
+/*! \endcond */
+
+/*! \brief Fast cut enumeration.
+ *
+ * This function implements the cut enumeration algorithm.  The algorithm
+ * traverses all nodes in topological order and computes a node's cuts based
+ * on its fanins' cuts.  Dominated cuts are filtered and are not added to the
+ * cut set.  For each node a unit cut is added to the end of each cut set.
+ *
+ * The template parameter `ComputeTruth` controls whether truth tables should
+ * be computed for each cut.  Computing truth tables slows down the execution
+ * time of the algorithm.
+ * 
+ * The cut size is controlled using the template parameter `NumVars` instead
+ * of the `cut_size` parameter as in `cut_enumeration`.
+ * 
+ * Comparing to `cut_enumeration`, it uses static truth tables instead of
+ * dynamic truth tables to speed-up the truth table computation.
+ *
+ * The number of computed cuts is controlled via the `cut_limit` parameter.
+ * To decide which cuts are collected in each node's cut set, cuts are sorted.
+ * Unit cuts do not participate in the sorting and are always added to the end
+ * of each cut set.
+ *
+ * The algorithm can be configured by speciying the template argument `CutData`
+ * which holds the application specific data assigned to each cut.  Examples
+ * on how to specify custom cost functions for sorting cuts based on the
+ * application specific cut data can be found in the files contained in the
+ * directory `include/mockturtle/algorithms/cut_enumeration`.
+ *
+ * **Required network functions:**
+ * - `is_constant`
+ * - `is_pi`
+ * - `size`
+ * - `get_node`
+ * - `node_to_index`
+ * - `foreach_node`
+ * - `foreach_fanin`
+ * - `compute` for `kitty::static_truth_table` (if `ComputeTruth` is true)
+ *
+   \verbatim embed:rst
+
+   .. warning::
+
+      This algorithm expects the nodes in the network to be in topological
+      order.  If the network does not guarantee a topological order of nodes
+      one can wrap the network parameter in a ``topo_view`` view.
+
+   .. note::
+
+      The implementation of this algorithm was heavily inspired by cut
+      enumeration implementations in ABC.
+   \endverbatim
+ */
+template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
+fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData> fast_cut_enumeration( Ntk const& ntk, cut_enumeration_params const& ps, cut_enumeration_stats * pst )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( !ComputeTruth || has_compute_v<Ntk, kitty::dynamic_truth_table>, "Ntk does not implement the compute method for kitty::dynamic_truth_table" );
+
+  cut_enumeration_stats st;
+  fast_network_cuts<Ntk, NumVars, ComputeTruth, CutData> res( ntk.size() );
+  detail::fast_cut_enumeration_impl<Ntk, NumVars, ComputeTruth, CutData> p( ntk, ps, st, res );
+  p.run();
+
+  if ( ps.verbose )
+  {
+    st.report();
+  }
+  if ( pst )
+  {
+    *pst = st;
+  }
+
+  return res;
+}
+
 // This function expects to receive a network where nodes are sorted in
 // topological order. Cuts are represented as a 64-bit bit vector where each bit
 // determines whether a given node exists in the cut.
diff --git a/include/mockturtle/algorithms/detail/switching_activity.hpp b/include/mockturtle/algorithms/detail/switching_activity.hpp
new file mode 100644
index 000000000..9480f0e59
--- /dev/null
+++ b/include/mockturtle/algorithms/detail/switching_activity.hpp
@@ -0,0 +1,71 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file switching_activity.hpp
+  \brief Utility to compute the switching activity
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <vector>
+
+// #include "../../utils/node_map.hpp"
+#include "../simulation.hpp"
+
+#include <kitty/bit_operations.hpp>
+#include <kitty/partial_truth_table.hpp>
+
+namespace mockturtle::detail
+{
+
+/*! \brief Switching Activity.
+ *
+ * This function computes the switching activity for each node
+ * in the network by performing random simulation.
+ *
+ * \param ntk Network
+ * \param simulation_size Number of simulation bits
+ */
+template<typename Ntk>
+std::vector<float> switching_activity( Ntk const& ntk, unsigned simulation_size = 2048 )
+{
+  std::vector<float> sw_map( ntk.size() );
+  partial_simulator sim( ntk.num_pis(), simulation_size );
+
+  auto tts = simulate_nodes<kitty::partial_truth_table, Ntk, partial_simulator>( ntk, sim );
+
+  ntk.foreach_node( [&]( auto const& n ) {
+    float ones = static_cast<float>( kitty::count_ones( tts[n] ) );
+    float activity = 2.0 * ones / simulation_size * ( simulation_size - ones ) / simulation_size;
+    sw_map[ntk.node_to_index( n )] = activity;
+  } );
+
+  return sw_map;
+}
+
+} // namespace mockturtle::detail
\ No newline at end of file
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 5a464d31e..5da30d622 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -45,6 +45,7 @@
 #include "cut_enumeration.hpp"
 #include "cut_enumeration/exact_map_cut.hpp"
 #include "cut_enumeration/tech_map_cut.hpp"
+#include "detail/switching_activity.hpp"
 
 namespace mockturtle
 {
@@ -58,7 +59,6 @@ struct map_params
 {
   map_params()
   {
-    cut_enumeration_ps.cut_size = 4;
     cut_enumeration_ps.cut_limit = 25;
     cut_enumeration_ps.minimize_truth_table = true;
   }
@@ -81,6 +81,12 @@ struct map_params
   /*! \brief Number of rounds for exact area optimization. */
   uint32_t ela_rounds{ 2u };
 
+  /*! \brief Number of rounds for exact switching power optimization. */
+  uint32_t eswp_rounds{ 0u };
+
+  /*! \brief Number of patterns for switching activity computation. */
+  uint32_t switching_activity_patterns{ 2048 };
+
   /*! \brief Use structural choices. */
   bool choices{ false };
 
@@ -95,9 +101,10 @@ struct map_params
  */
 struct map_stats
 {
-  /*! \brief Area and delay results. */
+  /*! \brief Area, delay, and power results. */
   double area{ 0 };
   double delay{ 0 };
+  double power{ 0 };
 
   /*! \brief Runtime. */
   stopwatch<>::duration time_mapping{ 0 };
@@ -121,7 +128,11 @@ struct map_stats
     {
       std::cout << stat;
     }
-    std::cout << fmt::format( "[i] Area = {:>5.2f}; Delay = {:>5.2f}\n", area, delay );
+    std::cout << fmt::format( "[i] Area = {:>5.2f}; Delay = {:>5.2f};", area, delay );
+    if ( power != 0 )
+      std::cout << fmt::format( " Power = {:>5.2f};\n", power );
+    else
+      std::cout << "\n";
     std::cout << fmt::format( "[i] Mapping runtime = {:>5.2f} secs\n", to_seconds( time_mapping ) );
     std::cout << fmt::format( "[i] Total runtime   = {:>5.2f} secs\n", to_seconds( time_total ) );
     if ( !gates_usage.empty() )
@@ -160,24 +171,38 @@ struct node_match_tech
   float flows[3];
 };
 
-template<class Ntk, unsigned NInputs, typename CutData>
+template<class Ntk, unsigned CutSize, typename CutData, unsigned NInputs>
 class tech_map_impl
 {
 public:
-  using network_cuts_t = network_cuts<Ntk, true, CutData>;
+  using network_cuts_t = fast_network_cuts<Ntk, CutSize, true, CutData>;
   using cut_t = typename network_cuts_t::cut_t;
   using supergate_t = std::array<std::vector<supergate<NInputs>> const*, 2>;
   using klut_map = std::unordered_map<uint32_t, std::array<signal<klut_network>, 2>>;
 
 public:
-  tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps, map_stats& st )
+  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps, map_stats& st )
+      : ntk( ntk ),
+        library( library ),
+        ps( ps ),
+        st( st ),
+        node_match( ntk.size() ),
+        matches(),
+        switch_activity( ps.eswp_rounds ? switching_activity( ntk, ps.switching_activity_patterns ) : std::vector<float>( 0 ) ),
+        cuts( fast_cut_enumeration<Ntk, CutSize, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
+  {
+    std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+  }
+
+  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, std::vector<float> const& switch_activity, map_params const& ps, map_stats& st )
       : ntk( ntk ),
         library( library ),
         ps( ps ),
         st( st ),
         node_match( ntk.size() ),
         matches(),
-        cuts( cut_enumeration<Ntk, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
+        switch_activity( switch_activity ),
+        cuts( fast_cut_enumeration<Ntk, NInputs, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
   {
     std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
   }
@@ -223,7 +248,17 @@ class tech_map_impl
     while ( iteration < ps.ela_rounds + ps.area_flow_rounds + 1 )
     {
       compute_required_time();
-      if ( !compute_mapping_exact_area() )
+      if ( !compute_mapping_exact<false>() )
+      {
+        return res;
+      }
+    }
+
+    /* compute mapping using exact area */
+    while ( iteration < ps.eswp_rounds + ps.ela_rounds + ps.area_flow_rounds + 1 )
+    {
+      compute_required_time();
+      if ( !compute_mapping_exact<true>() )
       {
         return res;
       }
@@ -286,7 +321,7 @@ class tech_map_impl
           continue;
         }
         const auto tt = cuts.truth_table( *cut );
-        const auto fe = kitty::extend_to<NInputs>( tt );
+        const auto fe = kitty::shrink_to<NInputs>( tt );
         auto const supergates_pos = library.get_supergates( fe );
         auto const supergates_neg = library.get_supergates( ~fe );
         if ( supergates_pos != nullptr || supergates_neg != nullptr )
@@ -353,7 +388,8 @@ class tech_map_impl
     return success;
   }
 
-  bool compute_mapping_exact_area()
+  template<bool SwitchActivity>
+  bool compute_mapping_exact()
   {
     for ( auto const& n : top_order )
     {
@@ -368,16 +404,16 @@ class tech_map_impl
       if ( node_data.same_match && node_data.map_refs[2] != 0 )
       {
         if ( node_data.best_supergate[0] != nullptr )
-          cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0u );
+          cut_deref<SwitchActivity>( cuts.cuts( index )[node_data.best_cut[0]], n, 0u );
         else
-          cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1u );
+          cut_deref<SwitchActivity>( cuts.cuts( index )[node_data.best_cut[1]], n, 1u );
       }
 
       /* match positive phase */
-      match_phase_exact( n, 0u );
+      match_phase_exact<SwitchActivity>( n, 0u );
 
       /* match negative phase */
-      match_phase_exact( n, 1u );
+      match_phase_exact<SwitchActivity>( n, 1u );
 
       /* try to drop one phase */
       match_drop_phase<true, true>( n, 0 );
@@ -391,7 +427,10 @@ class tech_map_impl
     {
       float area_gain = float( ( area_old - area ) / area_old * 100 );
       std::stringstream stats{};
-      stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      if constexpr ( SwitchActivity )
+        stats << fmt::format( "[i] Switching: Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      else
+        stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
       st.round_stats.push_back( stats.str() );
     }
 
@@ -723,6 +762,7 @@ class tech_map_impl
     node_data.best_supergate[phase] = best_supergate;
   }
 
+  template<bool SwitchActivity>
   void match_phase_exact( node<Ntk> const& n, uint8_t phase )
   {
     double best_arrival = std::numeric_limits<double>::max();
@@ -760,12 +800,12 @@ class tech_map_impl
       /* if cut is implemented, remove it from the cover */
       if ( !node_data.same_match && node_data.map_refs[phase] )
       {
-        best_exact_area = cut_deref( cuts.cuts( index )[best_cut], n, phase );
+        best_exact_area = cut_deref<SwitchActivity>( cuts.cuts( index )[best_cut], n, phase );
       }
       else
       {
-        best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
-        cut_deref( cuts.cuts( index )[best_cut], n, phase );
+        best_exact_area = cut_ref<SwitchActivity>( cuts.cuts( index )[best_cut], n, phase );
+        cut_deref<SwitchActivity>( cuts.cuts( index )[best_cut], n, phase );
       }
     }
 
@@ -792,8 +832,8 @@ class tech_map_impl
       {
         node_data.phase[phase] = gate.polarity;
         node_data.area[phase] = gate.area;
-        float area_exact = cut_ref( *cut, n, phase );
-        cut_deref( *cut, n, phase );
+        float area_exact = cut_ref<SwitchActivity>( *cut, n, phase );
+        cut_deref<SwitchActivity>( *cut, n, phase );
         double worst_arrival = 0.0f;
 
         auto ctr = 0u;
@@ -831,7 +871,7 @@ class tech_map_impl
 
     if ( !node_data.same_match && node_data.map_refs[phase] )
     {
-      best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+      best_exact_area = cut_ref<SwitchActivity>( cuts.cuts( index )[best_cut], n, phase );
     }
   }
 
@@ -854,7 +894,7 @@ class tech_map_impl
       if constexpr ( ELA )
       {
         if ( node_data.map_refs[2] )
-          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          cut_ref<false>( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
       }
       return;
     }
@@ -864,7 +904,7 @@ class tech_map_impl
       if constexpr ( ELA )
       {
         if ( node_data.map_refs[2] )
-          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          cut_ref<false>( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
       }
       return;
     }
@@ -958,13 +998,13 @@ class tech_map_impl
         {
           /* dereference the negative phase cut if in use */
           if ( node_data.map_refs[1] > 0 )
-            cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+            cut_deref<false>( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
           /* reference the positive cut if not in use before */
           if ( node_data.map_refs[0] == 0 && node_data.map_refs[2] )
-            cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+            cut_ref<false>( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
         }
         else if ( node_data.map_refs[2] )
-          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          cut_ref<false>( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
       }
       set_match_complemented_phase( index, 0, worst_arrival_nneg );
     }
@@ -977,13 +1017,13 @@ class tech_map_impl
         {
           /* dereference the positive phase cut if in use */
           if ( node_data.map_refs[0] > 0 )
-            cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+            cut_deref<false>( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
           /* reference the negative cut if not in use before */
           if ( node_data.map_refs[1] == 0 && node_data.map_refs[2] )
-            cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+            cut_ref<false>( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
         }
         else if ( node_data.map_refs[2] )
-          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          cut_ref<false>( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
       }
       set_match_complemented_phase( index, 1, worst_arrival_npos );
     }
@@ -1063,10 +1103,16 @@ class tech_map_impl
     return flow;
   }
 
+  template<bool SwitchActivity>
   float cut_ref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     auto const& node_data = node_match[ntk.node_to_index( n )];
-    float count = node_data.area[phase];
+    float count;
+
+    if constexpr ( SwitchActivity )
+      count = switch_activity[ntk.node_to_index( n )];
+    else
+      count = node_data.area[phase];
 
     uint8_t ctr = 0;
     for ( auto leaf : cut )
@@ -1084,7 +1130,12 @@ class tech_map_impl
         if ( leaf_phase == 1u )
         {
           if ( node_match[leaf].map_refs[1]++ == 0u )
-            count += lib_inv_area;
+          {
+            if constexpr ( SwitchActivity )
+              count += switch_activity[leaf];
+            else
+              count += lib_inv_area;
+          }
         }
         else
         {
@@ -1097,11 +1148,16 @@ class tech_map_impl
       {
         /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */
         if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
-          count += lib_inv_area;
+        {
+          if constexpr ( SwitchActivity )
+            count += switch_activity[leaf];
+          else
+            count += lib_inv_area;
+        }
         /* Recursive referencing if leaf was not referenced */
         if ( node_match[leaf].map_refs[2]++ == 0u )
         {
-          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+          count += cut_ref<SwitchActivity>( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
         }
       }
       else
@@ -1109,17 +1165,24 @@ class tech_map_impl
         ++node_match[leaf].map_refs[2];
         if ( node_match[leaf].map_refs[leaf_phase]++ == 0u )
         {
-          count += cut_ref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+          count += cut_ref<SwitchActivity>( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
         }
       }
     }
     return count;
   }
 
+  template<bool SwitchActivity>
   float cut_deref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     auto const& node_data = node_match[ntk.node_to_index( n )];
-    float count = node_data.area[phase];
+    float count;
+
+    if constexpr ( SwitchActivity )
+      count = switch_activity[ntk.node_to_index( n )];
+    else
+      count = node_data.area[phase];
+
     uint8_t ctr = 0;
     for ( auto leaf : cut )
     {
@@ -1136,7 +1199,12 @@ class tech_map_impl
         if ( leaf_phase == 1u )
         {
           if ( --node_match[leaf].map_refs[1] == 0u )
-            count += lib_inv_area;
+          {
+            if constexpr ( SwitchActivity )
+              count += switch_activity[leaf];
+            else
+              count += lib_inv_area;
+          }
         }
         else
         {
@@ -1149,11 +1217,16 @@ class tech_map_impl
       {
         /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */
         if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
-          count += lib_inv_area;
+        {
+          if constexpr ( SwitchActivity )
+            count += switch_activity[leaf];
+          else
+            count += lib_inv_area;
+        }
         /* Recursive dereferencing */
         if ( --node_match[leaf].map_refs[2] == 0u )
         {
-          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+          count += cut_deref<SwitchActivity>( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
         }
       }
       else
@@ -1161,7 +1234,7 @@ class tech_map_impl
         --node_match[leaf].map_refs[2];
         if ( --node_match[leaf].map_refs[leaf_phase] == 0u )
         {
-          count += cut_deref( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
+          count += cut_deref<SwitchActivity>( cuts.cuts( leaf )[node_match[leaf].best_cut[leaf_phase]], ntk.index_to_node( leaf ), leaf_phase );
         }
       }
     }
@@ -1240,6 +1313,8 @@ class tech_map_impl
     /* write final results */
     st.area = area;
     st.delay = delay;
+    if ( ps.eswp_rounds )
+      st.power = compute_switching_power();
     compute_gates_usage();
   }
 
@@ -1398,6 +1473,52 @@ class tech_map_impl
     st.gates_usage = gates_usage.str();
   }
 
+  double compute_switching_power()
+  {
+    double power = 0.0f;
+
+    ntk.foreach_node( [&]( auto const& n ) {
+      const auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          return true;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( node_data.map_refs[1] > 0 )
+          power += switch_activity[ntk.node_to_index( n )];
+        return true;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        return true;
+
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        power += switch_activity[ntk.node_to_index( n )];
+
+        if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+          power += switch_activity[ntk.node_to_index( n )];
+      }
+
+      phase = phase ^ 1;
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        power += switch_activity[ntk.node_to_index( n )];
+      }
+
+      return true;
+    } );
+
+    return power;
+  }
+
 private:
   Ntk const& ntk;
   tech_library<NInputs> const& library;
@@ -1417,6 +1538,7 @@ class tech_map_impl
   std::vector<node<Ntk>> top_order;
   std::vector<node_match_tech<NInputs>> node_match;
   std::unordered_map<uint32_t, std::vector<supergate_t>> matches;
+  std::vector<float> switch_activity;
   network_cuts_t cuts;
 };
 
@@ -1458,8 +1580,11 @@ class tech_map_impl
  * \param library Technology library
  * \param ps Mapping params
  * \param pst Mapping statistics
+ * 
+ * The implementation of this algorithm was inspired by the
+ * mapping command ``map`` in ABC.
  */
-template<class Ntk, unsigned NInputs, typename CutData = cut_enumeration_tech_map_cut>
+template<class Ntk, unsigned CutSize = 5u, typename CutData = cut_enumeration_tech_map_cut, unsigned NInputs>
 klut_network tech_map( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
@@ -1474,7 +1599,7 @@ klut_network tech_map( Ntk const& ntk, tech_library<NInputs> const& library, map
   static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
 
   map_stats st;
-  detail::tech_map_impl<Ntk, NInputs, CutData> p( ntk, library, ps, st );
+  detail::tech_map_impl<Ntk, CutSize, CutData, NInputs> p( ntk, library, ps, st );
   auto res = p.run();
 
   st.time_total = st.time_mapping + st.cut_enumeration_st.time_total;
@@ -1531,22 +1656,22 @@ struct node_match_t
   float flows[3];
 };
 
-template<class NtkDest, class Ntk, class RewritingFn, typename CutData, unsigned NInputs>
+template<class NtkDest, unsigned CutSize, typename CutData, class Ntk, class RewritingFn, unsigned NInputs>
 class exact_map_impl
 {
 public:
-  using network_cuts_t = network_cuts<Ntk, true, CutData>;
+  using network_cuts_t = fast_network_cuts<Ntk, CutSize, true, CutData>;
   using cut_t = typename network_cuts_t::cut_t;
 
 public:
-  exact_map_impl( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps, map_stats& st )
+  explicit exact_map_impl( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps, map_stats& st )
       : ntk( ntk ),
         library( library ),
         ps( ps ),
         st( st ),
         node_match( ntk.size() ),
         matches(),
-        cuts( cut_enumeration<Ntk, true, CutData>( ntk, ps.cut_enumeration_ps ) )
+        cuts( fast_cut_enumeration<Ntk, CutSize, true, CutData>( ntk, ps.cut_enumeration_ps ) )
   {
     std::tie( lib_inv_area, lib_inv_delay ) = library.get_inverter_info();
   }
@@ -1648,7 +1773,7 @@ class exact_map_impl
 
         /* match the cut using canonization and get the gates */
         const auto tt = cuts.truth_table( *cut );
-        const auto fe = kitty::extend_to<NInputs>( tt );
+        const auto fe = kitty::shrink_to<NInputs>( tt );
         const auto config = kitty::exact_npn_canonization( fe );
         auto const supergates_npn = library.get_supergates( std::get<0>( config ) );
         auto const supergates_npn_neg = library.get_supergates( ~std::get<0>( config ) );
@@ -2620,7 +2745,7 @@ class exact_map_impl
  * \param ps Mapping params
  * \param pst Mapping statistics
  */
-template<class Ntk, class NtkDest = Ntk, class RewritingFn, unsigned NInputs, typename CutData = cut_enumeration_exact_map_cut>
+template<class Ntk, unsigned CutSize = 4u, typename CutData = cut_enumeration_exact_map_cut, class NtkDest, class RewritingFn, unsigned NInputs>
 NtkDest exact_map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
@@ -2635,7 +2760,7 @@ NtkDest exact_map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const&
   static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
 
   map_stats st;
-  detail::exact_map_impl<NtkDest, Ntk, RewritingFn, CutData, NInputs> p( ntk, library, ps, st );
+  detail::exact_map_impl<NtkDest, CutSize, CutData, Ntk, RewritingFn, NInputs> p( ntk, library, ps, st );
   auto res = p.run();
 
   st.time_total = st.time_mapping + st.cut_enumeration_st.time_total;
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 2cab50ffc..f29168e2c 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -118,7 +118,7 @@ struct supergate
       mockturtle::tech_library lib( gates );
    \endverbatim
  */
-template<unsigned NInputs = 5u>
+template<unsigned NInputs = 4u>
 class tech_library
 {
   using supergates_list_t = std::vector<supergate<NInputs>>;
@@ -126,7 +126,7 @@ class tech_library
   using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
 
 public:
-  tech_library( std::vector<gate> const& gates, tech_library_params const ps = {} )
+  explicit tech_library( std::vector<gate> const& gates, tech_library_params const ps = {} )
       : _gates( gates ),
         _ps( ps ),
         _super_lib()
@@ -377,7 +377,7 @@ class exact_library
   using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
 
 public:
-  exact_library( RewritingFn const& rewriting_fn, exact_library_params const& ps = {} )
+  explicit exact_library( RewritingFn const& rewriting_fn, exact_library_params const& ps = {} )
       : _database(),
         _rewriting_fn( rewriting_fn ),
         _ps( ps ),

From b70d06f3d3a090900c7685948d800fe4d45d481a Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Fri, 30 Apr 2021 16:17:19 +0200
Subject: [PATCH 06/40] Mapper: added option to exploit logic sharing, name
 modifications, tests and experiments updates

---
 experiments/mapper.cpp                        |  83 ++--
 .../algorithms/detail/switching_activity.hpp  |   1 -
 include/mockturtle/algorithms/mapper.hpp      | 354 ++++++++++++++++--
 test/algorithms/mapper.cpp                    |  51 ++-
 4 files changed, 407 insertions(+), 82 deletions(-)

diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index 3a59d4d07..fa3273fbc 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -29,7 +29,6 @@
 #include <fmt/format.h>
 #include <lorina/aiger.hpp>
 #include <lorina/genlib.hpp>
-#include <mockturtle/utils/tech_library.hpp>
 #include <mockturtle/algorithms/mapper.hpp>
 #include <mockturtle/algorithms/node_resynthesis.hpp>
 #include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
@@ -37,51 +36,58 @@
 #include <mockturtle/io/genlib_reader.hpp>
 #include <mockturtle/networks/aig.hpp>
 #include <mockturtle/networks/klut.hpp>
+#include <mockturtle/networks/mig.hpp>
+#include <mockturtle/utils/tech_library.hpp>
 #include <mockturtle/views/depth_view.hpp>
 
-
 #include <experiments.hpp>
 
-std::string const mcnc_library =  "GATE   inv1    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   inv3    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
-                                  "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
-                                  "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
-                                  "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
-                                  "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
-                                  "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
-                                  "GATE   nor4    4 O=!{abcd};      PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
-                                  "GATE   and2    3 O=(ab);         PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
-                                  "GATE   or2     3 O={ab};         PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
-                                  "GATE   xor2a   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "#GATE  xor2b   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   xnor2a  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "#GATE  xnor2b  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
-                                  "GATE   aoi21   3 O=!{(ab)c};     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   aoi22   4 O=!{(ab)(cd)};  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   oai21   3 O=!({ab}c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
-                                  "GATE   oai22   4 O=!({ab}{cd});  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
-                                  "GATE   buf     2 O=a;            PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero    0 O=0;\n"
-                                  "GATE   one     0 O=1;";
+std::string const mcnc_library = "GATE   inv1    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                 "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                 "GATE   inv3    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                 "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                 "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                 "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                 "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                 "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                 "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                 "GATE   nor4    4 O=!{abcd};      PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                 "GATE   and2    3 O=(ab);         PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                 "GATE   or2     3 O={ab};         PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                 "GATE   xor2a   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "#GATE  xor2b   5 O=[ab];         PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "GATE   xnor2a  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                 "#GATE  xnor2b  5 O=![ab];        PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                 "GATE   aoi21   3 O=!{(ab)c};     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                 "GATE   aoi22   4 O=!{(ab)(cd)};  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                 "GATE   oai21   3 O=!({ab}c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                 "GATE   oai22   4 O=!({ab}{cd});  PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                 "GATE   buf     2 O=a;            PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                 "GATE   zero    0 O=0;\n"
+                                 "GATE   one     0 O=1;";
 
 int main()
 {
   using namespace experiments;
   using namespace mockturtle;
 
-  experiment<std::string, uint32_t, double, uint32_t, float, float, bool> exp( "mapper", "benchmark", "size", "area_after", "depth", "delay_after", "runtime", "equivalent" );
+  experiment<std::string, uint32_t, uint32_t, double, uint32_t, uint32_t, double, float, float, bool, bool> exp(
+      "mapper", "benchmark", "size", "size_mig", "area_after", "depth", "depth_mig", "delay_after", "runtime1", "runtime2", "equivalent1", "equivalent2" );
 
   fmt::print( "[i] processing technology library\n" );
 
-  std::vector<gate> gates;
+  /* library to map to MIGs */
+  mig_npn_resynthesis resyn{ true };
+  exact_library_params eps;
+  exact_library<mig_network, mig_npn_resynthesis> exact_lib( resyn, eps );
 
+  /* library to map to technology */
+  std::vector<gate> gates;
   std::istringstream in( mcnc_library );
   lorina::read_genlib( in, genlib_reader( gates ) );
 
   tech_library_params tps;
-  tech_library<5> lib( gates, tps );
+  tech_library tech_lib( gates, tps );
 
   for ( auto const& benchmark : epfl_benchmarks() )
   {
@@ -92,15 +98,24 @@ int main()
     const uint32_t size_before = aig.num_gates();
     const uint32_t depth_before = depth_view( aig ).depth();
 
-    map_params ps;
-    ps.cut_enumeration_ps.cut_size = 5;
-    map_stats st;
+    map_params ps1;
+    ps1.skip_delay_round = true;
+    ps1.required_time = std::numeric_limits<float>::max();
+    map_stats st1;
+
+    mig_network res1 = map( aig, exact_lib, ps1, &st1 );
+
+    map_params ps2;
+    map_stats st2;
+
+    klut_network res2 = map( aig, tech_lib, ps2, &st2 );
 
-    auto res = tech_map( aig, lib, ps, &st );
+    const auto cec1 = benchmark == "hyp" ? true : abc_cec( res1, benchmark );
+    const auto cec2 = benchmark == "hyp" ? true : abc_cec( res2, benchmark );
 
-    const auto cec = benchmark == "hyp" ? true : abc_cec( res, benchmark );
+    const uint32_t depth_mig = depth_view( res1 ).depth();
 
-    exp( benchmark, size_before, st.area, depth_before, st.delay, to_seconds( st.time_total ), cec );
+    exp( benchmark, size_before, res1.num_gates(), st2.area, depth_before, depth_mig, st2.delay, to_seconds( st1.time_total ), to_seconds( st2.time_total ), cec1, cec2 );
   }
 
   exp.save();
diff --git a/include/mockturtle/algorithms/detail/switching_activity.hpp b/include/mockturtle/algorithms/detail/switching_activity.hpp
index 9480f0e59..4098d1ac7 100644
--- a/include/mockturtle/algorithms/detail/switching_activity.hpp
+++ b/include/mockturtle/algorithms/detail/switching_activity.hpp
@@ -34,7 +34,6 @@
 
 #include <vector>
 
-// #include "../../utils/node_map.hpp"
 #include "../simulation.hpp"
 
 #include <kitty/bit_operations.hpp>
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 5da30d622..09a6077ce 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -41,10 +41,12 @@
 #include "../utils/node_map.hpp"
 #include "../utils/stopwatch.hpp"
 #include "../utils/tech_library.hpp"
+#include "../views/depth_view.hpp"
 #include "../views/topo_view.hpp"
 #include "cut_enumeration.hpp"
 #include "cut_enumeration/exact_map_cut.hpp"
 #include "cut_enumeration/tech_map_cut.hpp"
+#include "detail/mffc_utils.hpp"
 #include "detail/switching_activity.hpp"
 
 namespace mockturtle
@@ -85,10 +87,13 @@ struct map_params
   uint32_t eswp_rounds{ 0u };
 
   /*! \brief Number of patterns for switching activity computation. */
-  uint32_t switching_activity_patterns{ 2048 };
+  uint32_t switching_activity_patterns{ 2048u };
 
-  /*! \brief Use structural choices. */
-  bool choices{ false };
+  /*! \brief Exploit logic sharing in exact area optimization. */
+  bool enable_logic_sharing{ false };
+
+  /*! \brief Maximum number of cuts evaluated for logic sharing. */
+  uint32_t logic_sharing_cut_limit{ 8u };
 
   /*! \brief Be verbose. */
   bool verbose{ false };
@@ -1585,7 +1590,7 @@ class tech_map_impl
  * mapping command ``map`` in ABC.
  */
 template<class Ntk, unsigned CutSize = 5u, typename CutData = cut_enumeration_tech_map_cut, unsigned NInputs>
-klut_network tech_map( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+klut_network map( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
   static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
@@ -1669,6 +1674,7 @@ class exact_map_impl
         library( library ),
         ps( ps ),
         st( st ),
+        lib_database( library.get_database() ),
         node_match( ntk.size() ),
         matches(),
         cuts( fast_cut_enumeration<Ntk, CutSize, true, CutData>( ntk, ps.cut_enumeration_ps ) )
@@ -1711,13 +1717,19 @@ class exact_map_impl
     while ( iteration < ps.ela_rounds + ps.area_flow_rounds + 1 )
     {
       compute_required_time();
-      compute_exact_area();
+      if ( ps.enable_logic_sharing && iteration == ps.ela_rounds + ps.area_flow_rounds )
+        compute_exact_area_aggressive( res, old2new );
+      else
+        compute_exact_area();
     }
 
     /* generate the output network using the computed mapping */
     finalize_cover( res, old2new );
 
-    return res;
+    if ( ps.enable_logic_sharing )
+      return cleanup_dangling( res );
+    else
+      return res;
   }
 
 private:
@@ -1896,43 +1908,46 @@ class exact_map_impl
 
   void finalize_cover( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
   {
-    auto const& db = library.get_database();
+    if ( !ps.enable_logic_sharing || iteration == ps.area_flow_rounds + 1 )
+    {
+      auto const& db = library.get_database();
 
-    ntk.foreach_node( [&]( auto const& n ) {
-      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
-        return true;
-      auto index = ntk.node_to_index( n );
-      if ( node_match[index].map_refs[2] == 0u )
-        return true;
+      ntk.foreach_node( [&]( auto const& n ) {
+        if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+          return true;
+        auto index = ntk.node_to_index( n );
+        if ( node_match[index].map_refs[2] == 0u )
+          return true;
 
-      /* get the implemented phase and map the best cut */
-      unsigned phase = ( node_match[index].best_supergate[0] != nullptr ) ? 0 : 1;
-      auto& best_cut = cuts.cuts( index )[node_match[index].best_cut[phase]];
+        /* get the implemented phase and map the best cut */
+        unsigned phase = ( node_match[index].best_supergate[0] != nullptr ) ? 0 : 1;
+        auto& best_cut = cuts.cuts( index )[node_match[index].best_cut[phase]];
 
-      std::vector<signal<NtkDest>> children( NInputs, res.get_constant( false ) );
-      auto const& match = matches[index][best_cut->data.match_index];
-      auto const& supergate = node_match[index].best_supergate[phase];
-      auto ctr = 0u;
-      for ( auto l : best_cut )
-      {
-        children[match.permutation[ctr++]] = old2new[ntk.index_to_node( l )];
-      }
-      for ( auto i = 0u; i < NInputs; ++i )
-      {
-        if ( ( match.negation >> i ) & 1 )
+        std::vector<signal<NtkDest>> children( NInputs, res.get_constant( false ) );
+        auto const& match = matches[index][best_cut->data.match_index];
+        auto const& supergate = node_match[index].best_supergate[phase];
+        auto ctr = 0u;
+        for ( auto l : best_cut )
         {
-          children[i] = !children[i];
+          children[match.permutation[ctr++]] = old2new[ntk.index_to_node( l )];
         }
-      }
-      topo_view topo{ db, supergate->root };
-      auto f = cleanup_dangling( topo, res, children.begin(), children.end() ).front();
+        for ( auto i = 0u; i < NInputs; ++i )
+        {
+          if ( ( match.negation >> i ) & 1 )
+          {
+            children[i] = !children[i];
+          }
+        }
+        topo_view topo{ db, supergate->root };
+        auto f = cleanup_dangling( topo, res, children.begin(), children.end() ).front();
 
-      if ( phase == 1 )
-        f = !f;
+        if ( phase == 1 )
+          f = !f;
 
-      old2new[n] = f;
-      return true;
-    } );
+        old2new[n] = f;
+        return true;
+      } );
+    }
 
     /* create POs */
     ntk.foreach_po( [&]( auto const& f ) {
@@ -2385,6 +2400,267 @@ class exact_map_impl
     }
   }
 
+  void compute_exact_area_aggressive( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
+  {
+    depth_view<NtkDest> res_d{ res };
+
+    for ( auto const& n : top_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      /* recursively deselect the best cut shared between
+       * the two phases if in use in the cover */
+      if ( node_data.same_match && node_data.map_refs[2] != 0 )
+      {
+        if ( node_data.best_supergate[0] != nullptr )
+          cut_deref( cuts.cuts( index )[node_data.best_cut[0]], n, 0u );
+        else
+          cut_deref( cuts.cuts( index )[node_data.best_cut[1]], n, 1u );
+      }
+
+      /* match positive phase */
+      auto sig0 = match_phase_exact_aggressive( res_d, old2new, n, 0u );
+
+      /* match negative phase */
+      auto sig1 = match_phase_exact_aggressive( res_d, old2new, n, 1u );
+
+      /* try to drop one phase */
+      float worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
+      float worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
+      bool use_zero = false;
+      bool use_one = false;
+      if ( node_data.best_supergate[0] == nullptr )
+      {
+        set_match_complemented_phase( index, 1, worst_arrival_npos );
+        if ( node_data.map_refs[2] )
+        {
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          recursive_ref<NtkDest>( res, res.get_node( sig1 ) );
+        }
+        old2new[n] = sig1;
+        continue;
+      }
+      else if ( node_data.best_supergate[1] == nullptr )
+      {
+        set_match_complemented_phase( index, 0, worst_arrival_nneg );
+        if ( node_data.map_refs[2] )
+        {
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          recursive_ref<NtkDest>( res, res.get_node( sig0 ) );
+        }
+        old2new[n] = sig0;
+        continue;
+      }
+      use_zero = worst_arrival_nneg < node_data.required[1] + epsilon;
+      use_one = worst_arrival_npos < node_data.required[0] + epsilon;
+
+      if ( use_zero && use_one )
+      {
+        auto size_zero = cuts.cuts( index )[node_data.best_cut[0]].size();
+        auto size_one = cuts.cuts( index )[node_data.best_cut[1]].size();
+        if ( compare_map<true>( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+          use_one = false;
+        else
+          use_zero = false;
+      }
+
+      if ( use_zero )
+      {
+        if ( node_data.map_refs[2] )
+        {
+          cut_ref( cuts.cuts( index )[node_data.best_cut[0]], n, 0 );
+          recursive_ref<NtkDest>( res, res.get_node( sig0 ) );
+        }
+        set_match_complemented_phase( index, 0, worst_arrival_nneg );
+        old2new[n] = sig0;
+      }
+      else
+      {
+        if ( node_data.map_refs[2] )
+        {
+          cut_ref( cuts.cuts( index )[node_data.best_cut[1]], n, 1 );
+          recursive_ref<NtkDest>( res, res.get_node( sig1 ) );
+        }
+        set_match_complemented_phase( index, 1, worst_arrival_npos );
+        old2new[n] = sig1;
+      }
+    }
+    
+    double area_old = area;
+    set_mapping_refs<true>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::stringstream stats{};
+      stats << fmt::format( "[i] Area RW  : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+      st.round_stats.push_back( stats.str() );
+    }
+  }
+
+  signal<NtkDest> match_phase_exact_aggressive( depth_view<NtkDest>& res, node_map<signal<NtkDest>, Ntk>& old2new, node<Ntk> const& n, uint8_t phase )
+  {
+    signal<NtkDest> best_signal = res.get_constant( false );
+
+    float best_arrival = std::numeric_limits<float>::max();
+    float best_exact_area = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint8_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    auto& cut_matches = matches[index];
+    exact_supergate<NtkDest, NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* create best match info */
+    if ( best_supergate != nullptr )
+    {
+      auto const& cut = cuts.cuts( index )[node_data.best_cut[phase]];
+      auto const& supergates = cut_matches[( cut )->data.match_index];
+
+      /* permutate the children to the NPN-represenentative configuration */
+      std::vector<signal<NtkDest>> children( NInputs, res.get_constant( false ) );
+      auto ctr = 0u;
+
+      for ( auto l : cut )
+      {
+        children[supergates.permutation[ctr++]] = old2new[ntk.index_to_node( l )];
+      }
+
+      best_phase = supergates.negation;
+      best_cut = node_data.best_cut[phase];
+      best_size = cut.size();
+      for ( auto i = 0u; i < NInputs; ++i )
+      {
+        if ( ( best_phase >> i ) & 1 )
+        {
+          children[i] = !children[i];
+        }
+      }
+      topo_view topo{ lib_database, best_supergate->root };
+      auto f = cleanup_dangling( topo, res, children.begin(), children.end() ).front();
+
+      if ( phase == 1 )
+        f = !f;
+
+      best_signal = f;
+
+      best_arrival = res.level( res.get_node( f ) );
+
+      /* if cut is implemented, remove it from the cover */
+      if ( !node_data.same_match && node_data.map_refs[phase] )
+      {
+        best_area = recursive_ref<NtkDest>( res, res.get_node( f ) );
+        recursive_deref<NtkDest>( res, res.get_node( f ) );
+        best_exact_area = cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+      else
+      {
+        best_area = recursive_ref<NtkDest>( res, res.get_node( f ) );
+        recursive_deref<NtkDest>( res, res.get_node( f ) );
+        best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+        cut_deref( cuts.cuts( index )[best_cut], n, phase );
+      }
+    }
+
+    /* foreach cut */
+    unsigned int rewrite_count = 1u;
+    for ( auto& cut : cuts.cuts( index ) )
+    {
+      /* trivial cuts, not matched cuts, or rewriting limit reached */
+      if ( ( *cut )->data.ignore || ( rewrite_count > ps.logic_sharing_cut_limit && cut_index != best_cut ) )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = cut_matches[( *cut )->data.match_index];
+
+      if ( supergates.supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      ++rewrite_count;
+
+      std::vector<signal<NtkDest>> children( NInputs, res.get_constant( false ) );
+
+      auto ctr = 0u;
+      for ( auto l : *cut )
+      {
+        children[supergates.permutation[ctr++]] = old2new[ntk.index_to_node( l )];
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates.supergates[phase] )
+      {
+        uint8_t complement = supergates.negation;
+        node_data.phase[phase] = complement;
+
+        /* rewrite each structure and measure the logic sharing */
+        std::vector<signal<NtkDest>> children_loc( NInputs );
+
+        for ( auto ctr = 0u; ctr < NInputs; ++ctr )
+        {
+          children_loc[ctr] = children[ctr] ^ ( ( ( complement >> ctr ) & 1 ) == 1 );
+        }
+        topo_view topo{ lib_database, gate.root };
+        auto f = cleanup_dangling( topo, res, children_loc.begin(), children_loc.end() ).front();
+
+        if ( phase == 1 )
+          f = !f;
+
+        float worst_arrival = res.level( res.get_node( f ) );
+
+        float area_hashed = recursive_ref<NtkDest>( res, res.get_node( f ) );
+        node_data.area[phase] = area_hashed;
+        recursive_deref<NtkDest>( res, res.get_node( f ) );
+        auto area_exact = cut_ref( *cut, n, phase );
+        cut_deref( *cut, n, phase );
+
+        if ( worst_arrival > node_data.required[phase] + epsilon )
+          continue;
+
+        if ( compare_map<true>( worst_arrival, best_arrival, area_exact, best_exact_area, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_exact_area = area_exact;
+          best_area = area_hashed;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_phase = complement;
+          best_supergate = &gate;
+          best_signal = f;
+        }
+      }
+
+      ++cut_index;
+    }
+    old2new[n] = best_signal;
+    node_data.flows[phase] = best_exact_area;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+
+    if ( !node_data.same_match && node_data.map_refs[phase] )
+    {
+      recursive_ref<NtkDest>( res, res.get_node( best_signal ) );
+      best_exact_area = cut_ref( cuts.cuts( index )[best_cut], n, phase );
+    }
+    return best_signal;
+  }
+
   template<bool DO_AREA, bool ELA>
   void match_drop_phase( node<Ntk> const& n, unsigned area_margin_factor )
   {
@@ -2699,6 +2975,8 @@ class exact_map_impl
   float lib_inv_area;
   float lib_inv_delay;
 
+  NtkDest const& lib_database;
+
   std::vector<node<Ntk>> top_order;
   std::vector<node_match_t<NtkDest, NInputs>> node_match;
   std::unordered_map<uint32_t, std::vector<cut_match_t<NtkDest, NInputs>>> matches;
@@ -2709,7 +2987,7 @@ class exact_map_impl
 
 /*! \brief Exact mapping.
  *
- * This function implements a mapping algorithm using an exact synthesis database.
+ * This function implements a mapping algorithm using a database of structures.
  * It is controlled by a template argument `CutData` (defaulted to
  * `cut_enumeration_exact_map_cut`). The argument is similar to the `CutData` argument
  * in `cut_enumeration`, which can specialize the cost function to select priority
@@ -2746,7 +3024,7 @@ class exact_map_impl
  * \param pst Mapping statistics
  */
 template<class Ntk, unsigned CutSize = 4u, typename CutData = cut_enumeration_exact_map_cut, class NtkDest, class RewritingFn, unsigned NInputs>
-NtkDest exact_map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+NtkDest map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
   static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index c92dc860b..dd2a6c75a 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -52,7 +52,7 @@ TEST_CASE( "Map of MAJ3", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   CHECK( luts.size() == 6u );
   CHECK( luts.num_pis() == 3u );
@@ -84,7 +84,7 @@ TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   CHECK( luts.size() == 6u );
   CHECK( luts.num_pis() == 3u );
@@ -116,7 +116,7 @@ TEST_CASE( "Map of full adder", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   const float eps{0.005f};
 
@@ -153,7 +153,7 @@ TEST_CASE( "Map with inverters", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   const float eps{0.005f};
 
@@ -188,7 +188,7 @@ TEST_CASE( "Map for inverters minimization", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   const float eps{0.005f};
 
@@ -239,7 +239,7 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = tech_map( aig, lib, ps, &st );
+  klut_network luts = map( aig, lib, ps, &st );
 
   const float eps{0.005f};
 
@@ -270,7 +270,7 @@ TEST_CASE( "Exact map of bad MAJ3 and constant output", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  mig_network mig = exact_map( aig, lib, ps, &st );
+  mig_network mig = map( aig, lib, ps, &st );
 
   CHECK( mig.size() == 5u );
   CHECK( mig.num_pis() == 3u );
@@ -297,7 +297,7 @@ TEST_CASE( "Exact map of full adder", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  xmg_network xmg = exact_map( aig, lib, ps, &st );
+  xmg_network xmg = map( aig, lib, ps, &st );
 
   CHECK( xmg.size() == 7u );
   CHECK( xmg.num_pis() == 3u );
@@ -333,7 +333,7 @@ TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
   
   map_params ps;
   map_stats st;
-  aig_network res = exact_map( aig, lib, ps, &st );
+  aig_network res = map( aig, lib, ps, &st );
   
   CHECK( res.size() == 12 );
   CHECK( res.num_pis() == 3 );
@@ -341,4 +341,37 @@ TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
   CHECK( res.num_gates() == 8 );
   CHECK( st.area == 8.0f );
   CHECK( st.delay == 3.0f );
+}
+
+TEST_CASE( "Exact map with logic sharing", "[mapping]" )
+{
+  using resyn_fn = xag_npn_resynthesis<aig_network>;
+
+  resyn_fn resyn;
+
+  exact_library<aig_network, resyn_fn>  lib( resyn );
+
+  aig_network aig;
+  const auto x0 = aig.create_pi();
+  const auto x1 = aig.create_pi();
+  const auto x2 = aig.create_pi();
+  const auto x3 = aig.create_pi();
+
+  const auto n0 = aig.create_and( x0, !x1 );
+  const auto n1 = aig.create_and( x2, x3 );
+  const auto n2 = aig.create_and( x1, x2 );
+  const auto n3 = aig.create_and( n0, n1 );
+  const auto n4 = aig.create_and( n2, x3 );
+  aig.create_po( !n3 );
+  aig.create_po( !n4 );
+  
+  map_params ps;
+  ps.enable_logic_sharing = true;
+  map_stats st;
+  aig_network res = map( aig, lib, ps, &st );
+  
+  CHECK( res.size() == 9 );
+  CHECK( res.num_pis() == 4 );
+  CHECK( res.num_pos() == 2 );
+  CHECK( res.num_gates() == 4 );
 }
\ No newline at end of file

From ece0c0cb623854314b3d374c6fa4053a45ea1295 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Fri, 30 Apr 2021 16:24:23 +0200
Subject: [PATCH 07/40] minor fixes

---
 test/algorithms/mapper.cpp | 69 ++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index dd2a6c75a..69ed23c37 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -3,33 +3,30 @@
 #include <cstdint>
 #include <vector>
 
+#include <lorina/genlib.hpp>
 #include <mockturtle/algorithms/mapper.hpp>
 #include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
-#include <mockturtle/algorithms/node_resynthesis/xmg_npn.hpp>
 #include <mockturtle/algorithms/node_resynthesis/xag_npn.hpp>
+#include <mockturtle/algorithms/node_resynthesis/xmg_npn.hpp>
 #include <mockturtle/generators/arithmetic.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
-#include <mockturtle/utils/tech_library.hpp>
 #include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/klut.hpp>
 #include <mockturtle/networks/mig.hpp>
-#include <mockturtle/networks/xmg.hpp>
 #include <mockturtle/networks/xag.hpp>
-#include <mockturtle/networks/klut.hpp>
-#include <lorina/genlib.hpp>
-
-
+#include <mockturtle/networks/xmg.hpp>
+#include <mockturtle/utils/tech_library.hpp>
 
 using namespace mockturtle;
 
-std::string const test_library =  "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
-                                  "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
-                                  "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                  "GATE   xor2    5 O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
-                                  "GATE   mig3    3 O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
-                                  "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
-                                  "GATE   zero    0 O=0;\n"
-                                  "GATE   one     0 O=1;";
-
+std::string const test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                 "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                 "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                 "GATE   xor2    5 O=[ab];   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "GATE   mig3    3 O=<abc>;  PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
+                                 "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                 "GATE   zero    0 O=0;\n"
+                                 "GATE   one     0 O=1;";
 
 TEST_CASE( "Map of MAJ3", "[mapper]" )
 {
@@ -37,7 +34,7 @@ TEST_CASE( "Map of MAJ3", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -68,7 +65,7 @@ TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -100,7 +97,7 @@ TEST_CASE( "Map of full adder", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -118,7 +115,7 @@ TEST_CASE( "Map of full adder", "[mapper]" )
   map_stats st;
   klut_network luts = map( aig, lib, ps, &st );
 
-  const float eps{0.005f};
+  const float eps{ 0.005f };
 
   CHECK( luts.size() == 8u );
   CHECK( luts.num_pis() == 3u );
@@ -136,7 +133,7 @@ TEST_CASE( "Map with inverters", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -155,7 +152,7 @@ TEST_CASE( "Map with inverters", "[mapper]" )
   map_stats st;
   klut_network luts = map( aig, lib, ps, &st );
 
-  const float eps{0.005f};
+  const float eps{ 0.005f };
 
   CHECK( luts.size() == 11u );
   CHECK( luts.num_pis() == 3u );
@@ -173,7 +170,7 @@ TEST_CASE( "Map for inverters minimization", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -190,7 +187,7 @@ TEST_CASE( "Map for inverters minimization", "[mapper]" )
   map_stats st;
   klut_network luts = map( aig, lib, ps, &st );
 
-  const float eps{0.005f};
+  const float eps{ 0.005f };
 
   CHECK( luts.size() == 7u );
   CHECK( luts.num_pis() == 3u );
@@ -208,7 +205,7 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
 
   std::istringstream in( test_library );
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
-  
+
   CHECK( result == lorina::return_code::success );
 
   tech_library<3> lib( gates );
@@ -241,7 +238,7 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
   map_stats st;
   klut_network luts = map( aig, lib, ps, &st );
 
-  const float eps{0.005f};
+  const float eps{ 0.005f };
 
   CHECK( luts.size() == 9u );
   CHECK( luts.num_pis() == 4u );
@@ -255,9 +252,9 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
 
 TEST_CASE( "Exact map of bad MAJ3 and constant output", "[mapper]" )
 {
-  mig_npn_resynthesis resyn{true};
+  mig_npn_resynthesis resyn{ true };
 
-  exact_library<mig_network, mig_npn_resynthesis>  lib( resyn );
+  exact_library<mig_network, mig_npn_resynthesis> lib( resyn );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -284,7 +281,7 @@ TEST_CASE( "Exact map of full adder", "[mapper]" )
 {
   xmg_npn_resynthesis resyn;
 
-  exact_library<xmg_network, xmg_npn_resynthesis>  lib( resyn );
+  exact_library<xmg_network, xmg_npn_resynthesis> lib( resyn );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -313,7 +310,7 @@ TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
 
   resyn_fn resyn;
 
-  exact_library<aig_network, resyn_fn>  lib( resyn );
+  exact_library<aig_network, resyn_fn> lib( resyn );
 
   aig_network aig;
   const auto x0 = aig.create_pi();
@@ -330,11 +327,11 @@ TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
   const auto n7 = aig.create_and( !n5, !n6 );
   aig.create_po( n3 );
   aig.create_po( n7 );
-  
+
   map_params ps;
   map_stats st;
   aig_network res = map( aig, lib, ps, &st );
-  
+
   CHECK( res.size() == 12 );
   CHECK( res.num_pis() == 3 );
   CHECK( res.num_pos() == 2 );
@@ -349,7 +346,7 @@ TEST_CASE( "Exact map with logic sharing", "[mapping]" )
 
   resyn_fn resyn;
 
-  exact_library<aig_network, resyn_fn>  lib( resyn );
+  exact_library<aig_network, resyn_fn> lib( resyn );
 
   aig_network aig;
   const auto x0 = aig.create_pi();
@@ -364,14 +361,14 @@ TEST_CASE( "Exact map with logic sharing", "[mapping]" )
   const auto n4 = aig.create_and( n2, x3 );
   aig.create_po( !n3 );
   aig.create_po( !n4 );
-  
+
   map_params ps;
   ps.enable_logic_sharing = true;
   map_stats st;
   aig_network res = map( aig, lib, ps, &st );
-  
+
   CHECK( res.size() == 9 );
   CHECK( res.num_pis() == 4 );
   CHECK( res.num_pos() == 2 );
   CHECK( res.num_gates() == 4 );
-}
\ No newline at end of file
+}

From 725d35c66d22c2e86494284eb6291368244a310c Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 25 May 2021 17:02:00 +0200
Subject: [PATCH 08/40] Modified test: from exact_map to map

---
 test/algorithms/quality.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/algorithms/quality.cpp b/test/algorithms/quality.cpp
index 0be75b0a3..be1369ab1 100644
--- a/test/algorithms/quality.cpp
+++ b/test/algorithms/quality.cpp
@@ -177,7 +177,7 @@ TEST_CASE( "Test quality improvement of MIG mapping", "[quality]" )
     uint32_t const before = ntk.num_gates();
     map_params ps;
     map_stats st;
-    mig_network mig = exact_map( ntk, lib, ps, &st );
+    mig_network mig = map( ntk, lib, ps, &st );
     mig = cleanup_dangling( mig );
     return before - mig.num_gates();
   } );

From fa13d314762e18daf6f5bf481cde2f849651d69d Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 12 Jul 2021 18:58:43 +0200
Subject: [PATCH 09/40] Added P-enumeration and N-matching option in library
 and mapping to support bigger cell libraries, improved mapping performances,
 fixed error when truth table minimization is false, increased the cut limit
 to 50, updated tests for changes

---
 experiments/mapper.cpp                        |   5 +-
 .../mockturtle/algorithms/cut_enumeration.hpp |   2 +-
 include/mockturtle/algorithms/mapper.hpp      | 106 ++++++---
 include/mockturtle/utils/tech_library.hpp     |  94 +++++++-
 lib/kitty/kitty/npn.hpp                       | 214 ++++++++++++++++++
 test/algorithms/mapper.cpp                    |  44 +++-
 test/utils/tech_library.cpp                   |  61 ++++-
 7 files changed, 474 insertions(+), 52 deletions(-)

diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index a73af19c9..75e171b5f 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -47,7 +47,7 @@ std::string const mcnc_library = "GATE   inv1    1 O=!a;           PIN * INV 1 9
                                  "GATE   inv3    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
                                  "GATE   inv4    4 O=!a;           PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
                                  "GATE   nand2   2 O=!(ab);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
-                                 "GATE   nand3   3 O=!(abc);	      PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                 "GATE   nand3   3 O=!(abc);	     PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
                                  "GATE   nand4   4 O=!(abcd);      PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
                                  "GATE   nor2    2 O=!{ab};        PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
                                  "GATE   nor3    3 O=!{abc};       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
@@ -91,7 +91,7 @@ int main()
   }
 
   tech_library_params tps;
-  tech_library tech_lib( gates, tps );
+  tech_library<5, classification_type::np_configurations> tech_lib( gates, tps );
 
   for ( auto const& benchmark : epfl_benchmarks() )
   {
@@ -113,6 +113,7 @@ int main()
     mig_network res1 = map( aig, exact_lib, ps1, &st1 );
 
     map_params ps2;
+    ps2.cut_enumeration_ps.minimize_truth_table = false;
     map_stats st2;
 
     klut_network res2 = map( aig, tech_lib, ps2, &st2 );
diff --git a/include/mockturtle/algorithms/cut_enumeration.hpp b/include/mockturtle/algorithms/cut_enumeration.hpp
index ef094e2e9..f7ec8291a 100644
--- a/include/mockturtle/algorithms/cut_enumeration.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration.hpp
@@ -641,7 +641,7 @@ template<typename Ntk, uint32_t NumVars, bool ComputeTruth, typename CutData>
 struct fast_network_cuts
 {
 public:
-  static constexpr uint32_t max_cut_num = 26;
+  static constexpr uint32_t max_cut_num = 50;
   using cut_t = cut_type<ComputeTruth, CutData>;
   using cut_set_t = cut_set<cut_t, max_cut_num>;
   static constexpr bool compute_truth = ComputeTruth;
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 09a6077ce..7c2c6bc17 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -61,7 +61,7 @@ struct map_params
 {
   map_params()
   {
-    cut_enumeration_ps.cut_limit = 25;
+    cut_enumeration_ps.cut_limit = 49;
     cut_enumeration_ps.minimize_truth_table = true;
   }
 
@@ -149,6 +149,15 @@ struct map_stats
 namespace detail
 {
 
+template<unsigned NInputs>
+struct cut_match_tech
+{
+  /* list of supergates matching the cut for positive and negative output phases */
+  std::array<std::vector<supergate<NInputs>> const*, 2> supergates = { nullptr, nullptr };
+  /* input negations, 0: pos, 1: neg */
+  std::array<uint8_t, 2> negations{ 0, 0 };
+};
+
 template<unsigned NInputs>
 struct node_match_tech
 {
@@ -176,17 +185,17 @@ struct node_match_tech
   float flows[3];
 };
 
-template<class Ntk, unsigned CutSize, typename CutData, unsigned NInputs>
+template<class Ntk, unsigned CutSize, typename CutData, unsigned NInputs, classification_type Configuration>
 class tech_map_impl
 {
 public:
   using network_cuts_t = fast_network_cuts<Ntk, CutSize, true, CutData>;
   using cut_t = typename network_cuts_t::cut_t;
-  using supergate_t = std::array<std::vector<supergate<NInputs>> const*, 2>;
+  using match_map = std::unordered_map<uint32_t, std::vector<cut_match_tech<NInputs>>>;
   using klut_map = std::unordered_map<uint32_t, std::array<signal<klut_network>, 2>>;
 
 public:
-  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps, map_stats& st )
+  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, map_params const& ps, map_stats& st )
       : ntk( ntk ),
         library( library ),
         ps( ps ),
@@ -199,7 +208,7 @@ class tech_map_impl
     std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
   }
 
-  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs> const& library, std::vector<float> const& switch_activity, map_params const& ps, map_stats& st )
+  explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, std::vector<float> const& switch_activity, map_params const& ps, map_stats& st )
       : ntk( ntk ),
         library( library ),
         ps( ps ),
@@ -308,7 +317,7 @@ class tech_map_impl
     ntk.foreach_gate( [&]( auto const& n ) {
       const auto index = ntk.node_to_index( n );
 
-      std::vector<supergate_t> node_matches;
+      std::vector<cut_match_tech<NInputs>> node_matches;
 
       auto i = 0u;
       for ( auto& cut : cuts.cuts( index ) )
@@ -327,11 +336,36 @@ class tech_map_impl
         }
         const auto tt = cuts.truth_table( *cut );
         const auto fe = kitty::shrink_to<NInputs>( tt );
-        auto const supergates_pos = library.get_supergates( fe );
-        auto const supergates_neg = library.get_supergates( ~fe );
+        auto fe_canon = fe;
+
+        uint8_t negations_pos = 0;
+        uint8_t negations_neg = 0;
+
+        /* match positive polarity */
+        if constexpr ( Configuration == classification_type::p_configurations )
+        {
+          auto canon = kitty::exact_n_canonization( fe );
+          fe_canon = std::get<0>( canon );
+          negations_pos = std::get<1>( canon );
+        }
+        auto const supergates_pos = library.get_supergates( fe_canon );
+
+        /* match negative polarity */
+        if constexpr ( Configuration == classification_type::p_configurations )
+        {
+          auto canon = kitty::exact_n_canonization( ~fe );
+          fe_canon = std::get<0>( canon );
+          negations_neg = std::get<1>( canon );
+        }
+        else
+        {
+          fe_canon = ~fe;
+        }
+        auto const supergates_neg = library.get_supergates( fe_canon );
+
         if ( supergates_pos != nullptr || supergates_neg != nullptr )
         {
-          supergate_t match{ supergates_pos, supergates_neg };
+          cut_match_tech<NInputs> match { { supergates_pos, supergates_neg }, {negations_pos, negations_neg} };
 
           node_matches.push_back( match );
           ( *cut )->data.match_index = i++;
@@ -715,7 +749,8 @@ class tech_map_impl
         continue;
       }
 
-      auto const& supergates = cut_matches[( *cut )->data.match_index];
+      auto const& supergates = cut_matches[( *cut )->data.match_index].supergates;
+      auto const negation = cut_matches[( *cut )->data.match_index].negations[phase];
 
       if ( supergates[phase] == nullptr )
       {
@@ -726,14 +761,15 @@ class tech_map_impl
       /* match each gate and take the best one */
       for ( auto const& gate : *supergates[phase] )
       {
-        node_data.phase[phase] = gate.polarity;
+        uint8_t gate_polarity = gate.polarity ^ negation;
+        node_data.phase[phase] = gate_polarity;
         double area_local = gate.area + cut_leaves_flow( *cut, n, phase );
         double worst_arrival = 0.0f;
 
         auto ctr = 0u;
         for ( auto l : *cut )
         {
-          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr];
           worst_arrival = std::max( worst_arrival, arrival_pin );
           ++ctr;
         }
@@ -751,7 +787,7 @@ class tech_map_impl
           best_size = cut->size();
           best_cut = cut_index;
           best_area = gate.area;
-          best_phase = gate.polarity;
+          best_phase = gate_polarity;
           best_supergate = &gate;
         }
       }
@@ -824,7 +860,8 @@ class tech_map_impl
         continue;
       }
 
-      auto const& supergates = cut_matches[( *cut )->data.match_index];
+      auto const& supergates = cut_matches[( *cut )->data.match_index].supergates;
+      auto const negation = cut_matches[( *cut )->data.match_index].negations[phase];
 
       if ( supergates[phase] == nullptr )
       {
@@ -835,7 +872,8 @@ class tech_map_impl
       /* match each gate and take the best one */
       for ( auto const& gate : *supergates[phase] )
       {
-        node_data.phase[phase] = gate.polarity;
+        uint8_t gate_polarity = gate.polarity ^ negation;
+        node_data.phase[phase] = gate_polarity;
         node_data.area[phase] = gate.area;
         float area_exact = cut_ref<SwitchActivity>( *cut, n, phase );
         cut_deref<SwitchActivity>( *cut, n, phase );
@@ -844,7 +882,7 @@ class tech_map_impl
         auto ctr = 0u;
         for ( auto l : *cut )
         {
-          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr];
           worst_arrival = std::max( worst_arrival, arrival_pin );
           ++ctr;
         }
@@ -859,7 +897,7 @@ class tech_map_impl
           best_area = gate.area;
           best_size = cut->size();
           best_cut = cut_index;
-          best_phase = gate.polarity;
+          best_phase = gate_polarity;
           best_supergate = &gate;
         }
       }
@@ -1034,6 +1072,7 @@ class tech_map_impl
     }
   }
 
+  __attribute__((always_inline))
   inline void set_match_complemented_phase( uint32_t index, uint8_t phase, double worst_arrival_n )
   {
     auto& node_data = node_match[index];
@@ -1093,6 +1132,7 @@ class tech_map_impl
     }
   }
 
+  __attribute__((always_inline))
   inline double cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     double flow{ 0.0f };
@@ -1328,29 +1368,19 @@ class tech_map_impl
     auto const& node_data = node_match[index];
     auto& best_cut = cuts.cuts( index )[node_data.best_cut[phase]];
     auto const gate = node_data.best_supergate[phase]->root;
-    // auto tt = cuts.truth_table( best_cut );
-
-    /* check correctness */
-    /* invert the truth table if using the negative phase */
-    // if ( phase == 1 )
-    //   tt = ~tt;
-    // uint32_t neg = 0;
-    // for ( auto i = 0u; i < best_cut.size(); ++i )
-    // {
-    //   neg |= ( ( node_data.phase[phase] >> i ) & 1 ) << node_data.best_supergate[phase]->permutation[i];
-    // }
-    // auto check_tt = kitty::create_from_npn_config( std::make_tuple( tt, neg, node_data.best_supergate[phase]->permutation ) );
-    // assert( gate->function == check_tt );
 
     /* permutate and negate to obtain the matched gate truth table */
-    std::vector<signal<klut_network>> children( best_cut.size() );
+    std::vector<signal<klut_network>> children( gate->num_vars );
 
     auto ctr = 0u;
     for ( auto l : best_cut )
     {
+      if ( ctr >= gate->num_vars)
+        break;
       children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1];
       ++ctr;
     }
+
     /* create the node */
     auto f = res.create_node( children, gate->function );
 
@@ -1359,6 +1389,7 @@ class tech_map_impl
   }
 
   template<bool DO_AREA>
+  __attribute__((always_inline))
   inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size )
   {
     if constexpr ( DO_AREA )
@@ -1526,7 +1557,7 @@ class tech_map_impl
 
 private:
   Ntk const& ntk;
-  tech_library<NInputs> const& library;
+  tech_library<NInputs, Configuration> const& library;
   map_params const& ps;
   map_stats& st;
 
@@ -1542,7 +1573,7 @@ class tech_map_impl
 
   std::vector<node<Ntk>> top_order;
   std::vector<node_match_tech<NInputs>> node_match;
-  std::unordered_map<uint32_t, std::vector<supergate_t>> matches;
+  match_map matches;
   std::vector<float> switch_activity;
   network_cuts_t cuts;
 };
@@ -1589,8 +1620,8 @@ class tech_map_impl
  * The implementation of this algorithm was inspired by the
  * mapping command ``map`` in ABC.
  */
-template<class Ntk, unsigned CutSize = 5u, typename CutData = cut_enumeration_tech_map_cut, unsigned NInputs>
-klut_network map( Ntk const& ntk, tech_library<NInputs> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+template<class Ntk, unsigned CutSize = 5u, typename CutData = cut_enumeration_tech_map_cut, unsigned NInputs, classification_type Configuration>
+klut_network map( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
   static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
@@ -1604,7 +1635,7 @@ klut_network map( Ntk const& ntk, tech_library<NInputs> const& library, map_para
   static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
 
   map_stats st;
-  detail::tech_map_impl<Ntk, CutSize, CutData, NInputs> p( ntk, library, ps, st );
+  detail::tech_map_impl<Ntk, CutSize, CutData, NInputs, Configuration> p( ntk, library, ps, st );
   auto res = p.run();
 
   st.time_total = st.time_mapping + st.cut_enumeration_st.time_total;
@@ -2769,6 +2800,7 @@ class exact_map_impl
     }
   }
 
+  __attribute__((always_inline))
   inline void set_match_complemented_phase( uint32_t index, uint8_t phase, float worst_arrival_n )
   {
     auto& node_data = node_match[index];
@@ -2784,6 +2816,7 @@ class exact_map_impl
     node_data.flows[2] = node_data.flows[phase];
   }
 
+  __attribute__((always_inline))
   inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     float flow{ 0.0f };
@@ -2913,6 +2946,7 @@ class exact_map_impl
   }
 
   template<bool DO_AREA>
+  __attribute__((always_inline))
   inline bool compare_map( float arrival, float best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size )
   {
     if constexpr ( DO_AREA )
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 30e63f8ca..13e460a56 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -73,6 +73,14 @@ std::string const mcnc_library =  "GATE   inv1    1 O=!a;           PIN * INV 1
                                   "GATE   one     0 O=1;";
 */
 
+enum class classification_type : uint32_t
+{
+  /* generate the NP configurations (n! * 2^n) */
+  np_configurations = 0,
+  /* generate the P configurations (n!) and N-canonization */
+  p_configurations = 1,
+};
+
 struct tech_library_params
 {
   /*! \brief reports np enumerations */
@@ -118,7 +126,7 @@ struct supergate
       mockturtle::tech_library lib( gates );
    \endverbatim
  */
-template<unsigned NInputs = 4u>
+template<unsigned NInputs = 4u, classification_type Configuration = classification_type::np_configurations>
 class tech_library
 {
   using supergates_list_t = std::vector<supergate<NInputs>>;
@@ -252,14 +260,88 @@ class tech_library
           v.insert( it, sg );
           ++np_count;
         }
+      };
+
+      const auto on_p = [&]( auto const& tt, auto const& perm ) {
+        /* get all the configurations that lead to the N-class representative */
+        auto [tt_canon, phases] = kitty::exact_n_canonization_complete( tt );
+
+        for( auto phase : phases )
+        {
+          supergate<NInputs> sg;
+          sg.root = &gate;
+          sg.area = gate.area;
+          sg.worstDelay = worst_delay;
+          sg.polarity = 0;
+          sg.permutation = perm;
+
+          for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
+          {
+            sg.tdelay[i] = worst_delay;   /* if pin-to-pin delay change to: gate.delay[perm[i]] */
+            sg.polarity |= phase;         /* permutate input negation to match the right pin */
+          }
+          for ( auto i = perm.size(); i < NInputs; ++i )
+          {
+            sg.tdelay[i] = 0; /* added for completeness but not necessary */
+          }
+
+          const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+
+          auto& v = _super_lib[static_tt];
+
+          /* ordered insert by ascending area and number of input pins */
+          auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+            if ( s1.area < s2.area )
+              return true;
+            if ( s1.area > s2.area )
+              return false;
+            if ( s1.root->num_vars < s2.root->num_vars )
+              return true;
+            if ( s1.root->num_vars > s2.root->num_vars )
+              return true;
+            return s1.root->id < s2.root->id;
+          } );
+
+          bool to_add = true;
+          /* search for duplicated element due to symmetries */
+          while ( it != v.end() )
+          {
+            if ( sg.root->id == it->root->id )
+            {
+              /* if already in the library exit, else ignore permutations if with equal delay cost */
+              if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+              {
+                to_add = false;
+                break;
+              }
+            }
+            else
+            {
+              break;
+            }
+            ++it;
+          }
 
-        /* check correct results */
-        // assert( gate.function == create_from_npn_config( std::make_tuple( tt, neg, sg.permutation ) ) );
+          if ( to_add )
+          {
+            v.insert( it, sg );
+            ++np_count;
+          }
+        }
       };
 
-      /* NP enumeration of the function */
-      const auto tt = gate.function;
-      kitty::exact_np_enumeration( tt, on_np );
+      if constexpr ( Configuration == classification_type::np_configurations )
+      {
+        /* NP enumeration of the function */
+        const auto tt = gate.function;
+        kitty::exact_np_enumeration( tt, on_np );
+      }
+      else
+      {
+        /* P enumeration followed by N canonization of the function */
+        const auto tt = gate.function;
+        kitty::exact_p_enumeration( tt, on_p );
+      }
 
       if ( _ps.verbose )
       {
diff --git a/lib/kitty/kitty/npn.hpp b/lib/kitty/kitty/npn.hpp
index 29460ff15..83cf2e0f7 100755
--- a/lib/kitty/kitty/npn.hpp
+++ b/lib/kitty/kitty/npn.hpp
@@ -262,6 +262,78 @@ std::tuple<TT, uint32_t, std::vector<uint8_t>> exact_npn_canonization( const TT&
   return std::make_tuple( tmin, phase, perm );
 }
 
+/*! \brief Exact N canonization
+
+  Given a truth table, this function finds the lexicographically smallest truth
+  table in its N class, called N representative. Two functions are in the
+  same N class, if one can obtain one from the other by input negations.
+
+  The function can accept a callback as second parameter which is called for
+  every visited function when trying out all combinations.  This allows to
+  exhaustively visit the whole N class.
+
+  The function returns a N configuration which contains the necessary
+  transformations to obtain the representative.  It is a tuple of
+
+  - the N representative
+  - input negations that lead to the representative
+
+  \param tt The truth table
+  \param fn Callback for each visited truth table in the class (default does nothing)
+  \return N configurations
+*/
+template<typename TT, typename Callback = decltype( detail::exact_npn_canonization_null_callback<TT> )>
+std::tuple<TT, uint32_t> exact_n_canonization( const TT& tt, Callback&& fn = detail::exact_npn_canonization_null_callback<TT> )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    return std::make_tuple( tt, 0 );
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    return std::make_tuple( tt, 0 );
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+  auto tmin = t1;
+
+  fn( t1 );
+
+  const auto& flips = detail::flips[num_vars - 2u];
+  int best_flip = -1;
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    flip_inplace( t1, pos );
+
+    fn( t1 );
+
+    if ( t1 < tmin )
+    {
+      best_flip = static_cast<int>( j );
+      tmin = t1;
+    }
+  }
+
+  uint32_t phase = 0;
+  for ( auto i = 0; i <= best_flip; ++i )
+  {
+    phase ^= 1 << flips[i];
+  }
+
+  return std::make_tuple( tmin, phase );
+}
+
 /*! \brief Flip-swap NPN heuristic
 
   This algorithm will iteratively try to reduce the numeric value of the truth
@@ -625,6 +697,148 @@ void exact_np_enumeration( const TT& tt, Callback&& fn )
   }
 }
 
+/*! \brief Exact P enumeration
+
+  Given a truth table, this function enumerates all the functions in its
+  P class. Two functions are in the same NP class, if one can be obtained
+  from the other by input negation and input permutation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - P-enumerated truth table
+  - input permutation to apply
+
+  \param tt Truth table
+  \param fn Callback for each enumerated truth table in the P class
+*/
+template<typename TT, typename Callback>
+void exact_p_enumeration( const TT& tt, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tt, std::vector<uint8_t>{} );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tt, std::vector<uint8_t>{0} );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+
+  std::vector<uint8_t> perm( num_vars );
+  std::iota( perm.begin(), perm.end(), 0u );
+
+  fn( t1, perm );
+
+  const auto& swaps = detail::swaps[num_vars - 2u];
+
+  for ( std::size_t i = 0; i < swaps.size(); ++i )
+  {
+    const auto pos = swaps[i];
+    swap_adjacent_inplace( t1, pos );
+
+    std::swap( perm[pos], perm[pos + 1] );
+
+    fn( t1, perm );
+  }
+}
+
+/*! \brief Exact N canonization complete
+
+  Given a truth table, this function finds the lexicographically smallest truth
+  table in its N class, called N representative. Two functions are in the
+  same N class, if one can obtain one from the other by input negations.
+
+  The function can accept a callback as second parameter which is called for
+  every visited function when trying out all combinations.  This allows to
+  exhaustively visit the whole N class.
+
+  The function returns all the N configurations which contains the necessary
+  transformations to obtain the representative.  It is a tuple of
+
+  - the N representative
+  - a vector of all input negations that lead to the representative
+
+  \param tt The truth table
+  \param fn Callback for each visited truth table in the class (default does nothing)
+  \return N configurations
+*/
+template<typename TT, typename Callback = decltype( detail::exact_npn_canonization_null_callback<TT> )>
+std::tuple<TT, std::vector<uint32_t>> exact_n_canonization_complete( const TT& tt, Callback&& fn = detail::exact_npn_canonization_null_callback<TT> )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    return std::make_tuple( tt, std::vector<uint32_t>{0} );
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    return std::make_tuple( tt, std::vector<uint32_t>{0} );
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+  auto tmin = t1;
+
+  fn( t1 );
+
+  const auto& flips = detail::flips[num_vars - 2u];
+
+  std::vector<int> best_flip {-1};
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    flip_inplace( t1, pos );
+
+    fn( t1 );
+
+    if ( t1 < tmin )
+    {
+      best_flip.erase( best_flip.begin() + 1, best_flip.end() );
+      best_flip[0] = static_cast<int>( j );
+      tmin = t1;
+    }
+    else if ( t1 == tmin )
+    {
+      best_flip.push_back( static_cast<int>( j ) );
+    }
+  }
+
+  std::vector<uint32_t> phases( best_flip.size() );
+  uint32_t phase = 0;
+  int cnt = 0;
+  for ( auto i = 0; i < best_flip.size(); ++i )
+  {
+    auto flip = best_flip[i];
+    for ( ; cnt <= flip; ++cnt )
+    {
+      phase ^= 1 << flips[cnt];
+    }
+    phases[i] = phase;
+  }
+
+  return std::make_tuple( tmin, phases );
+}
+
 /*! \brief Obtain truth table from NPN configuration
 
   Given an NPN configuration, which contains a representative
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index d5daac618..e79a33f69 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -89,7 +89,7 @@ TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
   CHECK( st.delay == 2.0f );
 }
 
-TEST_CASE( "Map of full adder", "[mapper]" )
+TEST_CASE( "Map of full adder 1", "[mapper]" )
 {
   std::vector<gate> gates;
 
@@ -124,6 +124,42 @@ TEST_CASE( "Map of full adder", "[mapper]" )
   CHECK( st.delay < 3.8f + eps );
 }
 
+TEST_CASE( "Map of full adder 2", "[mapper]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  map_params ps;
+  ps.cut_enumeration_ps.minimize_truth_table = false;
+  map_stats st;
+  klut_network luts = map( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
 TEST_CASE( "Map with inverters", "[mapper]" )
 {
   std::vector<gate> gates;
@@ -202,7 +238,7 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
   auto result = lorina::read_genlib( in, genlib_reader( gates ) );
   CHECK( result == lorina::return_code::success );
 
-  tech_library<3> lib( gates );
+  tech_library<3, classification_type::np_configurations> lib( gates );
 
   aig_network aig;
   const auto a = aig.create_pi();
@@ -298,7 +334,7 @@ TEST_CASE( "Exact map of full adder", "[mapper]" )
   CHECK( st.delay == 2.0f );
 }
 
-TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
+TEST_CASE( "Exact map should avoid cycles", "[mapper]" )
 {
   using resyn_fn = xag_npn_resynthesis<aig_network>;
 
@@ -334,7 +370,7 @@ TEST_CASE( "Exact map should avoid cycles", "[mapping]" )
   CHECK( st.delay == 3.0f );
 }
 
-TEST_CASE( "Exact map with logic sharing", "[mapping]" )
+TEST_CASE( "Exact map with logic sharing", "[mapper]" )
 {
   using resyn_fn = xag_npn_resynthesis<aig_network>;
 
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index 2b23f306e..dfb3707d2 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -42,7 +42,7 @@ std::string const test_library =  "GATE   inv1    3 O=!a;           PIN * INV 3
                                   "GATE   zero    0 O=0;\n"
                                   "GATE   one     0 O=1;";
 
-TEST_CASE( "Simple library generation", "[tech_library]" )
+TEST_CASE( "Simple library generation 1", "[tech_library]" )
 {
   std::vector<gate> gates;
 
@@ -51,7 +51,7 @@ TEST_CASE( "Simple library generation", "[tech_library]" )
   
   CHECK( result == lorina::return_code::success );
 
-  tech_library<2> lib( gates );
+  tech_library<2, classification_type::np_configurations> lib( gates );
 
   CHECK( lib.max_gate_size() == 2 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
@@ -118,6 +118,61 @@ TEST_CASE( "Simple library generation", "[tech_library]" )
   CHECK( ( *nand_e )[0].polarity == 3u );
 }
 
+TEST_CASE( "Simple library generation 2", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<2, classification_type::p_configurations> lib( gates );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+
+  kitty::static_truth_table<2> tt;
+
+  kitty::create_from_hex_string( tt, "5" );
+  auto const inv = lib.get_supergates( tt );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 2 );
+  CHECK( ( *inv )[0].root->name == "inv1" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].worstDelay == 0.9f );
+  CHECK( ( *inv )[0].tdelay[0] == 0.9f );
+  CHECK( ( *inv )[0].polarity == 0u );
+  CHECK( ( *inv )[1].root->name == "inv2" );
+  CHECK( ( *inv )[1].area == 2.0f );
+  CHECK( ( *inv )[1].worstDelay == 1.0f );
+  CHECK( ( *inv )[1].tdelay[0] == 1.0f );
+  CHECK( ( *inv )[1].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "7" );
+  auto const nand_7 = lib.get_supergates( tt );
+  CHECK( nand_7 != nullptr );
+  CHECK( nand_7->size() == 1 );
+  CHECK( ( *nand_7 )[0].root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].area == 2.0f );
+  CHECK( ( *nand_7 )[0].worstDelay == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_7 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "b" );
+  auto const nand_b = lib.get_supergates( tt );
+  CHECK( nand_b == nullptr );
+
+  kitty::create_from_hex_string( tt, "d" );
+  auto const nand_d = lib.get_supergates( tt );
+  CHECK( nand_d == nullptr );
+
+  kitty::create_from_hex_string( tt, "e" );
+  auto const nand_e = lib.get_supergates( tt );
+  CHECK( nand_e == nullptr );
+}
+
 TEST_CASE( "Complete library generation", "[tech_library]" )
 {
   std::vector<gate> gates;
@@ -127,7 +182,7 @@ TEST_CASE( "Complete library generation", "[tech_library]" )
   
   CHECK( result == lorina::return_code::success );
 
-  tech_library<4> lib( gates );
+  tech_library<4, classification_type::np_configurations> lib( gates );
 
   CHECK( lib.max_gate_size() == 4 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 2u ) );

From b4fefb9a307dafac65f54f42daad90f613b04893 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 12 Jul 2021 20:36:23 +0200
Subject: [PATCH 10/40] Added inverter usage in exact mapping

---
 include/mockturtle/algorithms/mapper.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 7c2c6bc17..8fe8442a5 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -2269,7 +2269,7 @@ class exact_map_impl
       /* match each gate and take the best one */
       for ( auto const& gate : *supergates.supergates[phase] )
       {
-        uint8_t complement = supergates.negation;
+        uint8_t complement = supergates.negation ^ gate.polarity;
         node_data.phase[phase] = complement;
         float area_local = gate.area + cut_leaves_flow( *cut, n, phase );
         float worst_arrival = 0.0f;
@@ -2337,7 +2337,7 @@ class exact_map_impl
         children[supergates.permutation[ctr++]] = l;
       }
 
-      best_phase = supergates.negation;
+      best_phase = node_data.phase[phase];
       best_arrival = 0.0f;
       best_area = best_supergate->area;
       best_cut = node_data.best_cut[phase];
@@ -2388,7 +2388,7 @@ class exact_map_impl
 
       for ( auto const& gate : *supergates.supergates[phase] )
       {
-        uint8_t complement = supergates.negation;
+        uint8_t complement = supergates.negation ^ gate.polarity;
         node_data.phase[phase] = complement;
         node_data.area[phase] = gate.area;
         auto area_exact = cut_ref( *cut, n, phase );

From 7d5e2b34028818056f5519028354fa1bd12d60eb Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 12 Jul 2021 21:01:44 +0200
Subject: [PATCH 11/40] removed always_inline to avoid compiler error on
 Windows

---
 include/mockturtle/algorithms/mapper.hpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 8fe8442a5..5916c646c 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -1072,7 +1072,6 @@ class tech_map_impl
     }
   }
 
-  __attribute__((always_inline))
   inline void set_match_complemented_phase( uint32_t index, uint8_t phase, double worst_arrival_n )
   {
     auto& node_data = node_match[index];
@@ -1132,7 +1131,6 @@ class tech_map_impl
     }
   }
 
-  __attribute__((always_inline))
   inline double cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     double flow{ 0.0f };
@@ -1389,7 +1387,6 @@ class tech_map_impl
   }
 
   template<bool DO_AREA>
-  __attribute__((always_inline))
   inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size )
   {
     if constexpr ( DO_AREA )
@@ -2800,7 +2797,6 @@ class exact_map_impl
     }
   }
 
-  __attribute__((always_inline))
   inline void set_match_complemented_phase( uint32_t index, uint8_t phase, float worst_arrival_n )
   {
     auto& node_data = node_match[index];
@@ -2816,7 +2812,6 @@ class exact_map_impl
     node_data.flows[2] = node_data.flows[phase];
   }
 
-  __attribute__((always_inline))
   inline float cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
   {
     float flow{ 0.0f };
@@ -2946,7 +2941,6 @@ class exact_map_impl
   }
 
   template<bool DO_AREA>
-  __attribute__((always_inline))
   inline bool compare_map( float arrival, float best_arrival, float area_flow, float best_area_flow, uint32_t size, uint32_t best_size )
   {
     if constexpr ( DO_AREA )

From 5c00c2b0c92512616938c5f347ca35cb7118fbd3 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 13 Jul 2021 16:36:24 +0200
Subject: [PATCH 12/40] Added mapper documentation

---
 docs/algorithms/cut_enumeration.rst           |  9 +++
 docs/algorithms/mapper.rst                    | 78 +++++++++++++++++++
 docs/index.rst                                |  1 +
 docs/utils/util_data_structures.rst           | 31 ++++++++
 experiments/mapper.cpp                        |  2 +-
 .../cut_enumeration/exact_map_cut.hpp         |  5 +-
 .../cut_enumeration/tech_map_cut.hpp          |  1 +
 include/mockturtle/algorithms/mapper.hpp      | 32 +++++---
 include/mockturtle/utils/tech_library.hpp     | 45 ++++++++---
 9 files changed, 179 insertions(+), 25 deletions(-)
 create mode 100644 docs/algorithms/mapper.rst

diff --git a/docs/algorithms/cut_enumeration.rst b/docs/algorithms/cut_enumeration.rst
index 7ab90f092..7e1a193e7 100644
--- a/docs/algorithms/cut_enumeration.rst
+++ b/docs/algorithms/cut_enumeration.rst
@@ -97,7 +97,16 @@ Pre-defined cut types
 
 .. doxygenstruct:: mockturtle::cut_enumeration_spectr_cut
 
+**Header:** ``mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp``
+
+.. doxygenstruct:: mockturtle::cut_enumeration_tech_map_cut
+
+**Header:** ``mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp``
+
+.. doxygenstruct:: mockturtle::cut_enumeration_exact_map_cut
+
 Special-purpose implementations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. doxygenfunction:: mockturtle::fast_cut_enumeration
 
 .. doxygenfunction:: mockturtle::fast_small_cut_enumeration
diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst
new file mode 100644
index 000000000..2295e966b
--- /dev/null
+++ b/docs/algorithms/mapper.rst
@@ -0,0 +1,78 @@
+Mapper
+-----------
+
+**Header:** ``mockturtle/algorithms/mapper.hpp``
+
+A versatile mapper that supports technology mapping and graph mapping.
+The mapper is independent of the underlying graph representation. Hence,
+it supports generic subject graph representations (e.g., AIG, and MIG)
+and a generic target representation (e.g. cell library, XMG).
+The mapper aims at finding a good mapping with respect to delay, area,
+and switching power.
+
+The mapper uses a library (hash table) to facilitate Boolean matching.
+For technology mapping, it needs `tech_library` while for graph mapping
+it needs `exact_library`. For technology mapping, the generation of both NP- and
+P-configurations of gates are supported. Generally, it is convenient to use
+NP-configurations for small cell libraries (<20 gates). For bigger libraries,
+P-configurations perform better. For graph mapping, NPN classification is used
+instead.
+
+The following example shows how to perform delay-oriented technology mapping
+from an And-inverter graph using the default settings:
+
+.. code-block:: c++
+
+   aig_network aig = ...;
+
+   /* read cell library in genlib format */
+   std::vector<gate> gates;
+   std::ifstream in( ... );
+   lorina::read_genlib( in, genlib_reader( gates ) )
+   tech_library tech_lib( gates );
+
+   /* perform technology mapping */
+   klut_network res = map( aig, tech_lib );
+
+The mapped network is returned as a k-LUT network in which each k-LUT
+abstracts a cell.
+
+The next example performs area-oriented graph mapping from AIG to MIG
+using a NPN resynthesis database of structures:
+
+.. code-block:: c++
+
+   aig_network aig = ...;
+   
+   /* load the npn database in the library */
+   mig_npn_resynthesis resyn{ true };
+   exact_library<mig_network, mig_npn_resynthesis> exact_lib( resyn );
+
+   /* perform graph mapping */
+   map_params ps;
+   ps.skip_delay_round = true;
+   ps.required_time = std::numeric_limits<double>::max();
+   mig_network res = map( aig, exact_lib, ps );
+
+For graph mapping, we suggest reading the network directly in the
+target graph representation if possible (e.g. read an AIG as a MIG)
+since the mapping often leads to better results in this setting.
+
+As a default setting, cut enumeration minimizes the truth tables.
+This helps improving the results but slows down the computation. For
+a faster mapping set the truth table minimization parameter to false.
+The maximum number of cuts stored for each node is limited to 49.
+To increase this limit, change `max_cut_num` in `fast_network_cuts`.
+
+**Parameters and statistics**
+
+.. doxygenstruct:: mockturtle::map_params
+   :members:
+
+.. doxygenstruct:: mockturtle::map_stats
+   :members:
+
+**Algorithm**
+
+.. doxygenfunction:: mockturtle::map(Ntk const&, tech_library<NInputs, Configuration> const&, map_params const&, map_stats*)
+.. doxygenfunction:: mockturtle::map(Ntk&, exact_library<NtkDest, RewritingFn, NInputs> const&, map_params const&, map_stats*)
diff --git a/docs/index.rst b/docs/index.rst
index d50d6c707..bb1a57625 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -27,6 +27,7 @@ Welcome to mockturtle's documentation!
    :maxdepth: 2
    :caption: Algorithms
 
+   algorithms/mapper
    algorithms/lut_mapping
    algorithms/collapse_mapped
    algorithms/node_resynthesis
diff --git a/docs/utils/util_data_structures.rst b/docs/utils/util_data_structures.rst
index bf94561b7..e32ee73d4 100644
--- a/docs/utils/util_data_structures.rst
+++ b/docs/utils/util_data_structures.rst
@@ -34,6 +34,37 @@ Node map
 
 .. doxygenfunction:: mockturtle::initialize_copy_network
 
+Tech Library
+~~~~~~~~~~~~
+
+**Header:** ``mockturtle/utils/tech_library.hpp``
+
+.. doc_overview_table:: classmockturtle_1_1tech__library
+   :column: Method
+
+   get_supergates
+   get_inverter_info
+   max_gate_size
+   get_gates
+
+.. doxygenclass:: mockturtle::tech_library
+   :members:
+
+Exact Library
+~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/utils/tech_library.hpp``
+
+.. doc_overview_table:: classmockturtle_1_1exact__library
+   :column: Method
+
+   get_supergates
+   get_database
+   get_inverter_info
+
+.. doxygenclass:: mockturtle::exact_library
+   :members:
+
 Cuts
 ~~~~
 
diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index 75e171b5f..feff2e658 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -107,7 +107,7 @@ int main()
 
     map_params ps1;
     ps1.skip_delay_round = true;
-    ps1.required_time = std::numeric_limits<float>::max();
+    ps1.required_time = std::numeric_limits<double>::max();
     map_stats st1;
 
     mig_network res1 = map( aig, exact_lib, ps1, &st1 );
diff --git a/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
index 18bdc3c82..b6849fda1 100644
--- a/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration/exact_map_cut.hpp
@@ -43,10 +43,7 @@
 namespace mockturtle
 {
 
-/*! \brief Cut implementation based on ABC's giaMf.c
-
-  See <a href="https://github.com/berkeley-abc/abc/blob/master/src/aig/gia/giaMf.c">giaMf.c</a> in ABC's repository.
-*/
+/*! \brief Cut implementation for graph mapping with a complete database */
 struct cut_enumeration_exact_map_cut
 {
   uint32_t delay{ 0 };
diff --git a/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
index 6c84013e8..88dcd67f3 100644
--- a/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
+++ b/include/mockturtle/algorithms/cut_enumeration/tech_map_cut.hpp
@@ -43,6 +43,7 @@
 namespace mockturtle
 {
 
+/*! \brief Cut implementation for technology mapping */
 struct cut_enumeration_tech_map_cut
 {
   uint32_t delay{ 0 };
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 5916c646c..3f56dba23 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -52,10 +52,10 @@
 namespace mockturtle
 {
 
-/*! \brief Parameters for lut_mapping.
+/*! \brief Parameters for map.
  *
- * The data structure `lut_mapping_params` holds configurable parameters
- * with default arguments for `lut_mapping`.
+ * The data structure `map_params` holds configurable parameters
+ * with default arguments for `map`.
  */
 struct map_params
 {
@@ -67,14 +67,15 @@ struct map_params
 
   /*! \brief Parameters for cut enumeration
    *
-   * The default cut size is 4, the default cut limit is 8.
+   * The default cut limit is 49. By default,
+   * truth table minimization is performed.
    */
   cut_enumeration_params cut_enumeration_ps{};
 
   /*! \brief Required time for delay optimization. */
   double required_time{ 0.0f };
 
-  /*! \brief Do area optimization. */
+  /*! \brief Skip delay round for area optimization. */
   bool skip_delay_round{ false };
 
   /*! \brief Number of rounds for area flow optimization. */
@@ -89,7 +90,7 @@ struct map_params
   /*! \brief Number of patterns for switching activity computation. */
   uint32_t switching_activity_patterns{ 2048u };
 
-  /*! \brief Exploit logic sharing in exact area optimization. */
+  /*! \brief Exploit logic sharing in exact area optimization of graph mapping. */
   bool enable_logic_sharing{ false };
 
   /*! \brief Maximum number of cuts evaluated for logic sharing. */
@@ -101,18 +102,21 @@ struct map_params
 
 /*! \brief Statistics for mapper.
  *
- * The data structure `mapper_stats` provides data collected by running
- * `mapper`.
+ * The data structure `map_stats` provides data collected by running
+ * `map`.
  */
 struct map_stats
 {
-  /*! \brief Area, delay, and power results. */
+  /*! \brief Area result. */
   double area{ 0 };
+  /*! \brief Worst delay result. */
   double delay{ 0 };
+  /*! \brief Power result. */
   double power{ 0 };
 
-  /*! \brief Runtime. */
+  /*! \brief Runtime for covering. */
   stopwatch<>::duration time_mapping{ 0 };
+  /*! \brief Total runtime. */
   stopwatch<>::duration time_total{ 0 };
 
   /*! \brief Cut enumeration stats. */
@@ -268,7 +272,7 @@ class tech_map_impl
       }
     }
 
-    /* compute mapping using exact area */
+    /* compute mapping using exact switching activity estimation */
     while ( iteration < ps.eswp_rounds + ps.ela_rounds + ps.area_flow_rounds + 1 )
     {
       compute_required_time();
@@ -1595,6 +1599,8 @@ class tech_map_impl
  * See `include/mockturtle/algorithms/cut_enumeration/cut_enumeration_tech_map_cut.hpp`
  * for one example of a CutData type that implements the cost function that is used in
  * the technology mapper.
+ * 
+ * The function takes the size of the cuts in the template parameter `CutSize`.
  *
  * The function returns a k-LUT network. Each LUT abstacts a gate of the technology library.
  *
@@ -3031,6 +3037,8 @@ class exact_map_impl
  * See `include/mockturtle/algorithms/cut_enumeration/cut_enumeration_exact_map_cut.hpp`
  * for one example of a CutData type that implements the cost function that is used in
  * the technology mapper.
+ * 
+ * The function takes the size of the cuts in the template parameter `CutSize`.
  *
  * The function returns a mapped network representation generated using the exact
  * synthesis entries in the `exact_library`.
@@ -3064,6 +3072,8 @@ NtkDest map( Ntk& ntk, exact_library<NtkDest, RewritingFn, NInputs> const& libra
   static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
   static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
   static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
+  static_assert( has_incr_value_v<NtkDest>, "Ntk does not implement the incr_value method" );
+  static_assert( has_decr_value_v<NtkDest>, "Ntk does not implement the decr_value method" );
 
   map_stats st;
   detail::exact_map_impl<NtkDest, CutSize, CutData, Ntk, RewritingFn, NInputs> p( ntk, library, ps, st );
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 13e460a56..3c7ddd208 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -109,11 +109,17 @@ struct supergate
   uint8_t polarity{ 0 };
 };
 
-/*! \brief Library of np-enumerated gates
+/*! \brief Library of gates for Boolean matching
  *
  * This class creates a technology library from a set
- * of input gates. Each NP-configuration of each gate
- * is enumerated and inserted in the library.
+ * of input gates. Each NP- or P-configuration of the gates
+ * are enumerated and inserted in the library.
+ * 
+ * The configuration is selected using the template
+ * parameter `Configuration`. P-configuration is suggested
+ * for libraries with more than 20 gates. The template parameter
+ * `NInputs` selects the maximum number of variables
+ * allowed for a gate in the library.
  *
    \verbatim embed:rst
 
@@ -142,6 +148,11 @@ class tech_library
     generate_library();
   }
 
+  /*! \brief Get the gates matching the function.
+   *
+   * Returns a list of gates that match the function represented
+   * by the truth table.
+   */
   const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
   {
     auto match = _super_lib.find( tt );
@@ -150,16 +161,22 @@ class tech_library
     return nullptr;
   }
 
+  /*! \brief Get inverter information.
+   *
+   * Returns area, delay, and ID of the smallest inverter.
+   */
   const std::tuple<float, float, uint32_t> get_inverter_info() const
   {
     return std::make_tuple( _inv_area, _inv_delay, _inv_id );
   }
 
+  /*! \brief Returns the maximum number of variables of the gates. */
   unsigned max_gate_size()
   {
     return _max_size;
   }
 
+  /*! \brief Returns the original gates. */
   const std::vector<gate> get_gates() const
   {
     return _gates;
@@ -433,12 +450,12 @@ struct exact_library_params
   bool verbose{ false };
 };
 
-/*! \brief Library of exact synthesis supergates
+/*! \brief Library of graph structures for Boolean matching
  *
- * This class creates a technology library from an exact
- * synthesis database. Each NPN-entry in the database is
- * stored in its NP class by removing the output inverter
- * if present. The class creates supergates from the
+ * This class creates a technology library from a database
+ * of structures classified in NPN classes. Each NPN-entry in
+ * the database is stored in its NP class by removing the output
+ * inverter if present. The class creates supergates from the
  * database computing area and delay information.
  *
    \verbatim embed:rst
@@ -448,7 +465,7 @@ struct exact_library_params
    .. code-block:: c++
 
       mockturtle::mig_npn_resynthesis mig_resyn{true};
-      mockturtle::exact_library<mockturtle::mig_network, mockturtle::mig_npn_resynthesis, 4> lib( mig_resyn );
+      mockturtle::exact_library<mockturtle::mig_network, mockturtle::mig_npn_resynthesis> lib( mig_resyn );
    \endverbatim
  */
 template<typename Ntk, class RewritingFn, unsigned NInputs = 4u>
@@ -468,6 +485,11 @@ class exact_library
     generate_library();
   }
 
+  /*! \brief Get the structures matching the function.
+   *
+   * Returns a list of graph structures that match the function
+   * represented by the truth table.
+   */
   const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
   {
     auto match = _super_lib.find( tt );
@@ -476,11 +498,16 @@ class exact_library
     return nullptr;
   }
 
+  /*! \brief Returns the NPN database of structures. */
   const Ntk& get_database() const
   {
     return _database;
   }
 
+  /*! \brief Get inverter information.
+   *
+   * Returns area, and delay cost of the inverter.
+   */
   const std::tuple<float, float> get_inverter_info() const
   {
     return std::make_pair( _ps.area_inverter, _ps.delay_inverter );

From 1ddcc1f3dcbdb7bf8f632a7efef279cf586ba21b Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 19 Jul 2021 23:13:35 +0200
Subject: [PATCH 13/40] docs fix

---
 docs/algorithms/mapper.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst
index 2295e966b..f0430740f 100644
--- a/docs/algorithms/mapper.rst
+++ b/docs/algorithms/mapper.rst
@@ -1,5 +1,5 @@
 Mapper
------------
+------
 
 **Header:** ``mockturtle/algorithms/mapper.hpp``
 

From d16199f26cca9a83e3d62b809e0de74cacc5861c Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 2 Aug 2021 21:48:44 +0200
Subject: [PATCH 14/40] Add binding view for mapped networks, added i/o code to
 write mapped networks in verilog

---
 include/mockturtle/algorithms/mapper.hpp  |  21 ++-
 include/mockturtle/io/write_verilog.hpp   | 204 ++++++++++++++++++++++
 include/mockturtle/views/binding_view.hpp | 127 ++++++++++++++
 lib/lorina/lorina/verilog.hpp             |  36 ++++
 4 files changed, 382 insertions(+), 6 deletions(-)
 create mode 100644 include/mockturtle/views/binding_view.hpp

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 3f56dba23..3b4f7f9a4 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -41,6 +41,7 @@
 #include "../utils/node_map.hpp"
 #include "../utils/stopwatch.hpp"
 #include "../utils/tech_library.hpp"
+#include "../views/binding_view.hpp"
 #include "../views/depth_view.hpp"
 #include "../views/topo_view.hpp"
 #include "cut_enumeration.hpp"
@@ -225,7 +226,7 @@ class tech_map_impl
     std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
   }
 
-  klut_network run()
+  binding_view<klut_network> run()
   {
     stopwatch t( st.time_mapping );
 
@@ -1288,9 +1289,9 @@ class tech_map_impl
     return count;
   }
 
-  std::pair<klut_network, klut_map> initialize_map_network()
+  std::pair<binding_view<klut_network>, klut_map> initialize_map_network()
   {
-    klut_network dest;
+    binding_view<klut_network> dest( library.get_gates() );
     klut_map old2new;
 
     old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][0] = dest.get_constant( false );
@@ -1302,7 +1303,7 @@ class tech_map_impl
     return { dest, old2new };
   }
 
-  void finalize_cover( klut_network& res, klut_map& old2new )
+  void finalize_cover( binding_view<klut_network>& res, klut_map& old2new )
   {
     ntk.foreach_node( [&]( auto const& n ) {
       if ( ntk.is_constant( n ) )
@@ -1314,7 +1315,10 @@ class tech_map_impl
       if ( ntk.is_pi( n ) )
       {
         if ( node_match[index].map_refs[1] > 0 )
+        {
           old2new[index][1] = res.create_not( old2new[n][0] );
+          res.add_binding( res.get_node( old2new[index][1] ), lib_inv_id );
+        }
         return true;
       }
 
@@ -1329,10 +1333,14 @@ class tech_map_impl
       if ( node_data.same_match || node_data.map_refs[phase] > 0 )
       {
         create_lut_for_gate( res, old2new, index, phase );
+        res.add_binding( res.get_node( old2new[index][phase] ), node_match[index].best_supergate[phase]->root->id );
 
         /* add inverted version if used */
         if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+        {
           old2new[index][phase ^ 1] = res.create_not( old2new[index][phase] );
+          res.add_binding( res.get_node( old2new[index][phase ^ 1] ), lib_inv_id );
+        }
       }
 
       phase = phase ^ 1;
@@ -1340,6 +1348,7 @@ class tech_map_impl
       if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
       {
         create_lut_for_gate( res, old2new, index, phase );
+        res.add_binding( res.get_node( old2new[index][phase] ), node_match[index].best_supergate[phase]->root->id );
       }
 
       return true;
@@ -1365,7 +1374,7 @@ class tech_map_impl
     compute_gates_usage();
   }
 
-  void create_lut_for_gate( klut_network& res, klut_map& old2new, uint32_t index, unsigned phase )
+  void create_lut_for_gate( binding_view<klut_network>& res, klut_map& old2new, uint32_t index, unsigned phase )
   {
     auto const& node_data = node_match[index];
     auto& best_cut = cuts.cuts( index )[node_data.best_cut[phase]];
@@ -1624,7 +1633,7 @@ class tech_map_impl
  * mapping command ``map`` in ABC.
  */
 template<class Ntk, unsigned CutSize = 5u, typename CutData = cut_enumeration_tech_map_cut, unsigned NInputs, classification_type Configuration>
-klut_network map( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
+binding_view<klut_network> map( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, map_params const& ps = {}, map_stats* pst = nullptr )
 {
   static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
   static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index 044f01b6a..ba7f3b416 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -29,6 +29,7 @@
 
   \author Heinz Riener
   \author Mathias Soeken
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -44,6 +45,7 @@
 #include "../traits.hpp"
 #include "../utils/node_map.hpp"
 #include "../utils/string_utils.hpp"
+#include "../views/binding_view.hpp"
 #include "../views/topo_view.hpp"
 
 namespace mockturtle
@@ -349,6 +351,208 @@ void write_verilog( Ntk const& ntk, std::ostream& os, write_verilog_params const
   writer.on_module_end();
 }
 
+/*! \brief Writes mapped network in structural Verilog format into output stream
+ *
+ * **Required network functions:**
+ * - `num_pis`
+ * - `num_pos`
+ * - `foreach_pi`
+ * - `foreach_node`
+ * - `foreach_fanin`
+ * - `get_node`
+ * - `get_constant`
+ * - `is_constant`
+ * - `is_pi`
+ * - `node_to_index`
+ * 
+ * \param ntk Mapped network
+ * \param os Output stream
+ * \param ps Verilog parameters
+ */
+template<class Ntk>
+void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilog_params const& ps = {} )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_num_pis_v<Ntk>, "Ntk does not implement the num_pis method" );
+  static_assert( has_num_pos_v<Ntk>, "Ntk does not implement the num_pos method" );
+  static_assert( has_foreach_pi_v<Ntk>, "Ntk does not implement the foreach_pi method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_get_constant_v<Ntk>, "Ntk does not implement the get_constant method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+
+  assert( ntk.is_combinational() && "Network has to be combinational" );
+
+  lorina::verilog_writer writer( os );
+
+  std::vector<std::string> xs, inputs;
+  if ( ps.input_names.empty() )
+  {
+    for ( auto i = 0u; i < ntk.num_pis(); ++i )
+      xs.emplace_back( fmt::format( "x{}", i ) );
+    inputs = xs;
+  }
+  else
+  {
+    uint32_t ctr{0u};
+    for ( auto const& [name, width] : ps.input_names )
+    {
+      inputs.emplace_back( name );
+      ctr += width;
+      for ( auto i = 0u; i < width; ++i )
+      {
+        xs.emplace_back( fmt::format( "{}[{}]", name, i ) );
+      }
+    }
+    if ( ctr != ntk.num_pis() )
+    {
+      std::cerr << "[e] input names do not partition all inputs\n";
+    }
+  }
+
+  std::vector<std::string> ys, outputs;
+  if ( ps.output_names.empty() )
+  {
+    for ( auto i = 0u; i < ntk.num_pos(); ++i )
+      ys.emplace_back( fmt::format( "y{}", i ) );
+    outputs = ys;
+  }
+  else
+  {
+    uint32_t ctr{0u};
+    for ( auto const& [name, width] : ps.output_names )
+    {
+      outputs.emplace_back( name );
+      ctr += width;
+      for ( auto i = 0u; i < width; ++i )
+      {
+        ys.emplace_back( fmt::format( "{}[{}]", name, i ) );
+      }
+    }
+    if ( ctr != ntk.num_pos() )
+    {
+      std::cerr << "[e] output names do not partition all outputs\n";
+    }
+  }
+
+  /* compute which nodes are POs and register index */
+  node_map<std::vector<uint32_t>, binding_view<Ntk>, std::unordered_map<typename Ntk::node, std::vector<uint32_t>>> po_nodes( ntk );
+  ntk.foreach_po( [&]( auto const& f, auto i ) {
+    po_nodes[f].push_back( i );
+  } );
+
+  std::vector<std::string> ws;
+
+  /* add wires */
+  ntk.foreach_gate( [&]( auto const& n ) {
+    if ( !po_nodes.has( n ) )
+    {
+      ws.emplace_back( fmt::format( "n{}", ntk.node_to_index( n ) ) );
+    }
+  } );
+
+  writer.on_module_begin( ps.module_name, inputs, outputs );
+  if ( ps.input_names.empty() )
+  {
+    writer.on_input( xs );
+  }
+  else
+  {
+    for ( auto const& [name, width] : ps.input_names )
+    {
+      writer.on_input( width, name );
+    }
+  }
+  if ( ps.output_names.empty() )
+  {
+    writer.on_output( ys );
+  }
+  else
+  {
+    for ( auto const& [name, width] : ps.output_names )
+    {
+      writer.on_output( width, name );
+    }
+  }
+  if ( !ws.empty() )
+  {
+    writer.on_wire( ws );
+  }
+
+  node_map<std::string, binding_view<Ntk>> node_names( ntk );
+  node_names[ntk.get_constant( false )] = "1'b0";
+  if ( ntk.get_node( ntk.get_constant( false ) ) != ntk.get_node( ntk.get_constant( true ) ) )
+    node_names[ntk.get_constant( true )] = "1'b1";
+
+  ntk.foreach_pi( [&]( auto const& n, auto i ) {
+    node_names[n] = xs[i];
+  } );
+
+  auto const& gates = ntk.get_library();
+
+  int nDigits = ( int ) std::floor( std::log10( ntk.num_gates() ) );
+  unsigned long length = 0;
+  unsigned counter = 0;
+
+  for ( auto const& gate : gates )
+  {
+    length = std::max( length, gate.name.length() );
+  }
+
+  topo_view ntk_topo{ntk};
+
+  /* if node drives multiple POs, dupplicate */
+  ntk_topo.foreach_node( [&]( auto const& n ) {
+    if ( po_nodes.has( n ) )
+    {
+      node_names[n] = ys[po_nodes[n][0]];
+    }
+    else if ( !ntk.is_constant( n ) && !ntk.is_pi( n ) )
+    {
+      node_names[n] = fmt::format( "n{}", ntk.node_to_index( n ) );
+    }
+
+    if ( ntk.has_binding( n ) )
+    {
+      std::string name = gates[ntk.get_binding_index( n )].name;
+
+      int digits = counter == 0 ? 0 : ( int ) std::floor( std::log10( counter ) );
+      writer.on_nodes( name.append( std::string( length - name.length(), ' ' ) ),
+                       std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
+                       detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names ),
+                       { node_names[n] } );
+      ++counter;
+
+      /* if node drives multiple POs, duplicate */
+      if ( po_nodes.has( n ) && po_nodes[n].size() > 1 )
+      {
+        std::cout << "[i] node " << n << " driving multiple POs has been duplicated.\n";
+        auto const& po_list = po_nodes[n];
+        for ( auto i = 1u; i < po_list.size(); ++i )
+        {
+          digits = counter == 0 ? 0 : ( int ) std::floor( std::log10( counter ) );
+          writer.on_nodes( name.append( std::string( length - name.length(), ' ' ) ),
+                           std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
+                           detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names ),
+                           { ys[po_list[i]] } );
+          ++counter;
+        }
+      }
+    }
+    else if ( !ntk.is_constant( n ) && !ntk.is_pi( n ) )
+    {
+      std::cerr << "[e] internal node " << n << " is not mapped.\n";
+    }
+
+    return true;
+  } );
+
+  writer.on_module_end();
+}
+
 /*! \brief Writes network in structural Verilog format into a file
  *
  * **Required network functions:**
diff --git a/include/mockturtle/views/binding_view.hpp b/include/mockturtle/views/binding_view.hpp
new file mode 100644
index 000000000..a7314bc8a
--- /dev/null
+++ b/include/mockturtle/views/binding_view.hpp
@@ -0,0 +1,127 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file binding_view.hpp
+  \brief Implements methods to bind the network to a standard cells library
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include "../io/genlib_reader.hpp"
+#include "../utils/node_map.hpp"
+
+#include <map>
+
+namespace mockturtle
+{
+
+template<class Ntk>
+class binding_view : public Ntk
+{
+public:
+  using node = typename Ntk::node;
+
+public:
+  explicit binding_view( std::vector<gate> const& library )
+      : Ntk()
+      , _library{ library }
+      , _bindings( *this )
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  }
+
+  explicit binding_view( Ntk const& ntk, std::vector<gate> const& library )
+      : Ntk( ntk )
+      , _library{ library }
+      , _bindings( *this )
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  }
+
+  binding_view<Ntk>& operator=( binding_view<Ntk> const& binding_ntk )
+  {
+    Ntk::operator=( binding_ntk );
+    _library = binding_ntk._library;
+    _bindings = binding_ntk._bindings;
+    return *this;
+  }
+
+  void add_binding( node const& n, uint32_t gate_id )
+  {
+    assert( gate_id < _library.size() );
+    _bindings[n] = gate_id;
+  }
+
+  bool add_binding_with_check( node const& n, uint32_t gate_id )
+  {
+    assert( gate_id < _library.size() );
+
+    auto const& cell = _library[gate_id];
+
+    if ( Ntk::node_function( n ) == cell.function )
+    {
+      _bindings[n] = gate_id;
+      return true;
+    }
+    return false;
+  }
+
+  void remove_binding( node const& n ) const
+  {
+    _bindings.erase( n );
+  }
+
+  const gate& get_binding( node const& n) const
+  {
+    return _library[_bindings[n]];
+  }
+
+  bool has_binding( node const& n) const
+  {
+    return _bindings.has( n );
+  }
+
+  unsigned int get_binding_index( node const& n) const
+  {
+    return _bindings[n];
+  }
+
+  const std::vector<gate>& get_library() const
+  {
+    return _library;
+  }
+
+private:
+  std::vector<gate> const _library;
+  node_map<uint32_t, Ntk, std::unordered_map<node, uint32_t>> _bindings;
+}; /* binding_view */
+
+template<class T>
+binding_view( T const& ) -> binding_view<T>;
+
+} // namespace mockturtle
diff --git a/lib/lorina/lorina/verilog.hpp b/lib/lorina/lorina/verilog.hpp
index 5212a2d44..c3958c002 100644
--- a/lib/lorina/lorina/verilog.hpp
+++ b/lib/lorina/lorina/verilog.hpp
@@ -30,6 +30,7 @@
   \author Heinz Riener
   \author Mathias Soeken
   \author Siang-Yun (Sonia) Lee
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -736,6 +737,41 @@ class verilog_writer
                         in.first ? "~" : "", in.second );
   }
 
+  /*! \brief Callback method for writing a node such as a standard cell.
+   *
+   * \param node_name Node name
+   * \param inst_name Name of the instance
+   * \param ins Input signals
+   * \param outs Output signals
+   */
+  virtual void on_nodes( std::string const& node_name, std::string const& inst_name, std::vector<std::pair<bool,std::string>> const& ins, std::vector<std::string> const& outs )
+  {
+    _os << fmt::format( "  {} {}(", node_name, inst_name );
+
+    /* write inputs */
+    char pin_name = 'a';
+    for ( auto const& in : ins )
+    {
+      _os << fmt::format( ".{}({}), ", pin_name, in.second );
+      ++pin_name;
+    }
+
+    /* write_outputs */
+    if ( outs.size() == 1 )
+    {
+      _os << fmt::format( ".O({}));\n", outs[0] );
+    }
+    else
+    {
+      unsigned i;
+      for ( i = 0u; i < outs.size() - 1; ++i )
+      {
+        _os << fmt::format( ".O{}({}), ", i, outs[i] );
+      }
+      _os << fmt::format( ".O{}({}));\n", i, outs[i] );
+    }
+  }
+
 protected:
   std::ostream& _os; /*!< Output stream */
 }; /* verilog_writer */

From 9cf738396f3e116623a0a9d17ff82cbf9053fd32 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 3 Aug 2021 14:44:11 +0200
Subject: [PATCH 15/40] Added buffering in mapping

---
 include/mockturtle/algorithms/mapper.hpp  | 78 +++++++++++++++++++++--
 include/mockturtle/utils/tech_library.hpp | 31 +++++++++
 2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 3b4f7f9a4..75b751f7a 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -211,6 +211,7 @@ class tech_map_impl
         cuts( fast_cut_enumeration<Ntk, CutSize, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
   {
     std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+    std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info();
   }
 
   explicit tech_map_impl( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, std::vector<float> const& switch_activity, map_params const& ps, map_stats& st )
@@ -224,6 +225,7 @@ class tech_map_impl
         cuts( fast_cut_enumeration<Ntk, NInputs, true, CutData>( ntk, ps.cut_enumeration_ps, &st.cut_enumeration_st ) )
   {
     std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+    std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info();
   }
 
   binding_view<klut_network> run()
@@ -283,6 +285,9 @@ class tech_map_impl
       }
     }
 
+    /* insert buffers for POs driven by PIs */
+    insert_buffers();
+
     /* generate the output network */
     finalize_cover( res, old2new );
 
@@ -1289,6 +1294,37 @@ class tech_map_impl
     return count;
   }
 
+  void insert_buffers()
+  {
+    if ( lib_buf_id != UINT32_MAX )
+    {
+      double area_old = area;
+      bool buffers = false;
+
+      ntk.foreach_po( [&]( auto const& f ) {
+        auto const& n = ntk.get_node( f );
+        if ( !ntk.is_constant( n ) && ntk.is_pi( n ) && !ntk.is_complemented( f ) )
+        {
+          area += lib_buf_area;
+          delay = std::max( delay, node_match[ntk.node_to_index( n )].arrival[0] + lib_inv_delay );
+          buffers = true;
+        }
+      } );
+
+      /* round stats */
+      if ( ps.verbose && buffers )
+      {
+        std::stringstream stats{};
+        float area_gain = 0.0f;
+
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+        stats << fmt::format( "[i] Buffering: Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
+        st.round_stats.push_back( stats.str() );
+      }
+    }
+  }
+
   std::pair<binding_view<klut_network>, klut_map> initialize_map_network()
   {
     binding_view<klut_network> dest( library.get_gates() );
@@ -1306,15 +1342,18 @@ class tech_map_impl
   void finalize_cover( binding_view<klut_network>& res, klut_map& old2new )
   {
     ntk.foreach_node( [&]( auto const& n ) {
-      if ( ntk.is_constant( n ) )
-        return true;
-
       auto index = ntk.node_to_index( n );
+      auto const& node_data = node_match[index];
 
       /* add inverter at PI if needed */
-      if ( ntk.is_pi( n ) )
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          return true;
+      }
+      else if ( ntk.is_pi( n ) )
       {
-        if ( node_match[index].map_refs[1] > 0 )
+        if ( node_data.map_refs[1] > 0 )
         {
           old2new[index][1] = res.create_not( old2new[n][0] );
           res.add_binding( res.get_node( old2new[index][1] ), lib_inv_id );
@@ -1323,10 +1362,9 @@ class tech_map_impl
       }
 
       /* continue if cut is not in the cover */
-      if ( node_match[index].map_refs[2] == 0u )
+      if ( node_data.map_refs[2] == 0u )
         return true;
 
-      auto const& node_data = node_match[index];
       unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
 
       /* add used cut */
@@ -1360,6 +1398,16 @@ class tech_map_impl
       {
         res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][1] );
       }
+      else if ( !ntk.is_constant( ntk.get_node( f ) ) && ntk.is_pi( ntk.get_node( f ) ) && lib_buf_id != UINT32_MAX )
+      {
+        /* create buffers for POs */
+        static uint64_t _buf = 0x2;
+        kitty::dynamic_truth_table tt_buf( 1 );
+        kitty::create_from_words( tt_buf, &_buf, &_buf + 1 );
+        auto buf = res.create_node( { old2new[ntk.node_to_index( ntk.get_node( f ) )][0] }, tt_buf );
+        res.create_po( buf );
+        res.add_binding( buf, lib_buf_id );
+      }
       else
       {
         res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][0] );
@@ -1493,6 +1541,17 @@ class tech_map_impl
       return true;
     } );
 
+    if ( lib_buf_id != UINT32_MAX )
+    {
+      ntk.foreach_po( [&]( auto const& f ) {
+        auto const& n = ntk.get_node( f );
+        if ( !ntk.is_constant( n ) && ntk.is_pi( n ) && !ntk.is_complemented( f ) )
+        {
+          ++gates_profile[lib_buf_id];
+        }
+      } );
+    }
+
     std::stringstream gates_usage;
     double tot_area = 0.0f;
     uint32_t tot_instances = 0u;
@@ -1581,6 +1640,11 @@ class tech_map_impl
   float lib_inv_delay;
   uint32_t lib_inv_id;
 
+  /* lib buffer info */
+  float lib_buf_area;
+  float lib_buf_delay;
+  uint32_t lib_buf_id;
+
   std::vector<node<Ntk>> top_order;
   std::vector<node_match_tech<NInputs>> node_match;
   match_map matches;
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 3c7ddd208..850af92a3 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -170,6 +170,15 @@ class tech_library
     return std::make_tuple( _inv_area, _inv_delay, _inv_id );
   }
 
+  /*! \brief Get buffer information.
+   *
+   * Returns area, delay, and ID of the smallest buffer.
+   */
+  const std::tuple<float, float, uint32_t> get_buffer_info() const
+  {
+    return std::make_tuple( _buf_area, _buf_delay, _buf_id );
+  }
+
   /*! \brief Returns the maximum number of variables of the gates. */
   unsigned max_gate_size()
   {
@@ -186,6 +195,7 @@ class tech_library
   void generate_library()
   {
     bool inv = false;
+    bool buf = false;
 
     for ( auto& gate : _gates )
     {
@@ -211,6 +221,17 @@ class tech_library
             inv = true;
           }
         }
+        else
+        {
+          /* get the smallest area buffer */
+          if ( !buf || gate.area < _buf_area )
+          {
+            _buf_area = gate.area;
+            _buf_delay = worst_delay;
+            _buf_id = gate.id;
+            buf = true;
+          }
+        }
       }
 
       _max_size = std::max( _max_size, gate.num_vars );
@@ -371,6 +392,11 @@ class tech_library
       std::cerr << "[i] WARNING: inverter gate has not been detected in the library" << std::endl;
     }
 
+    if ( !buf )
+    {
+      std::cerr << "[i] WARNING: buffer gate has not been detected in the library" << std::endl;
+    }
+
     if ( _ps.very_verbose )
     {
       for ( auto const& entry : _super_lib )
@@ -405,6 +431,11 @@ class tech_library
   float _inv_delay{ 0.0 };
   uint32_t _inv_id{ UINT32_MAX };
 
+  /* buffer info */
+  float _buf_area{ 0.0 };
+  float _buf_delay{ 0.0 };
+  uint32_t _buf_id{ UINT32_MAX };
+
   unsigned _max_size{ 0 }; /* max #fanins of the gates in the library */
 
   std::vector<gate> const _gates; /* collection of gates */

From fc246e5623a537efff5665b719ac113c4a13f7f3 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 3 Aug 2021 15:59:45 +0200
Subject: [PATCH 16/40] Simplified write_verilog for mapped networks, added and
 updated tests, minor fixes

---
 experiments/mapper.cpp                   |  7 +++-
 include/mockturtle/algorithms/mapper.hpp |  4 +-
 include/mockturtle/io/write_verilog.hpp  | 29 ++++++++++----
 lib/lorina/lorina/verilog.hpp            | 36 -----------------
 test/algorithms/mapper.cpp               | 23 +++++------
 test/io/write_verilog.cpp                | 50 ++++++++++++++++++++++++
 test/utils/tech_library.cpp              |  1 +
 7 files changed, 91 insertions(+), 59 deletions(-)

diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index feff2e658..fe25728ef 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -34,10 +34,12 @@
 #include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
 #include <mockturtle/io/aiger_reader.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/io/write_verilog.hpp>
 #include <mockturtle/networks/aig.hpp>
 #include <mockturtle/networks/klut.hpp>
 #include <mockturtle/networks/mig.hpp>
 #include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/views/binding_view.hpp>
 #include <mockturtle/views/depth_view.hpp>
 
 #include <experiments.hpp>
@@ -113,10 +115,11 @@ int main()
     mig_network res1 = map( aig, exact_lib, ps1, &st1 );
 
     map_params ps2;
-    ps2.cut_enumeration_ps.minimize_truth_table = false;
+    ps2.cut_enumeration_ps.minimize_truth_table = true;
+    ps2.cut_enumeration_ps.cut_limit = 24;
     map_stats st2;
 
-    klut_network res2 = map( aig, tech_lib, ps2, &st2 );
+    binding_view<klut_network> res2 = map( aig, tech_lib, ps2, &st2 );
 
     const auto cec1 = benchmark == "hyp" ? true : abc_cec( res1, benchmark );
     const auto cec2 = benchmark == "hyp" ? true : abc_cec( res2, benchmark );
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 75b751f7a..2e993d58c 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -1404,9 +1404,9 @@ class tech_map_impl
         static uint64_t _buf = 0x2;
         kitty::dynamic_truth_table tt_buf( 1 );
         kitty::create_from_words( tt_buf, &_buf, &_buf + 1 );
-        auto buf = res.create_node( { old2new[ntk.node_to_index( ntk.get_node( f ) )][0] }, tt_buf );
+        const auto buf = res.create_node( { old2new[ntk.node_to_index( ntk.get_node( f ) )][0] }, tt_buf );
         res.create_po( buf );
-        res.add_binding( buf, lib_buf_id );
+        res.add_binding( res.get_node( buf ), lib_buf_id );
       }
       else
       {
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index ba7f3b416..062e5b9a4 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -520,10 +520,21 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
       std::string name = gates[ntk.get_binding_index( n )].name;
 
       int digits = counter == 0 ? 0 : ( int ) std::floor( std::log10( counter ) );
-      writer.on_nodes( name.append( std::string( length - name.length(), ' ' ) ),
-                       std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
-                       detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names ),
-                       { node_names[n] } );
+      auto fanin_names = detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names );
+      std::vector<std::pair<std::string,std::string>> args;
+
+      char pin_name = 'a';
+      for ( auto pair : fanin_names )
+      {
+        args.emplace_back( std::make_pair( std::string( 1, pin_name ), pair.second ) );
+        ++pin_name;
+      }
+      args.emplace_back( std::make_pair( "O", node_names[n] ) );
+
+      writer.on_module_instantiation( name.append( std::string( length - name.length(), ' ' ) ),
+                                      {},
+                                      std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
+                                      args );
       ++counter;
 
       /* if node drives multiple POs, duplicate */
@@ -534,10 +545,12 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
         for ( auto i = 1u; i < po_list.size(); ++i )
         {
           digits = counter == 0 ? 0 : ( int ) std::floor( std::log10( counter ) );
-          writer.on_nodes( name.append( std::string( length - name.length(), ' ' ) ),
-                           std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
-                           detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names ),
-                           { ys[po_list[i]] } );
+          args[args.size() - 1] = std::make_pair( "O", ys[po_list[i]] );
+
+          writer.on_module_instantiation( name.append( std::string( length - name.length(), ' ' ) ),
+                                          {},
+                                          std::string( "g" ) + std::string( nDigits - digits, '0' ) + std::to_string( counter ),
+                                          args );
           ++counter;
         }
       }
diff --git a/lib/lorina/lorina/verilog.hpp b/lib/lorina/lorina/verilog.hpp
index c3958c002..5212a2d44 100644
--- a/lib/lorina/lorina/verilog.hpp
+++ b/lib/lorina/lorina/verilog.hpp
@@ -30,7 +30,6 @@
   \author Heinz Riener
   \author Mathias Soeken
   \author Siang-Yun (Sonia) Lee
-  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -737,41 +736,6 @@ class verilog_writer
                         in.first ? "~" : "", in.second );
   }
 
-  /*! \brief Callback method for writing a node such as a standard cell.
-   *
-   * \param node_name Node name
-   * \param inst_name Name of the instance
-   * \param ins Input signals
-   * \param outs Output signals
-   */
-  virtual void on_nodes( std::string const& node_name, std::string const& inst_name, std::vector<std::pair<bool,std::string>> const& ins, std::vector<std::string> const& outs )
-  {
-    _os << fmt::format( "  {} {}(", node_name, inst_name );
-
-    /* write inputs */
-    char pin_name = 'a';
-    for ( auto const& in : ins )
-    {
-      _os << fmt::format( ".{}({}), ", pin_name, in.second );
-      ++pin_name;
-    }
-
-    /* write_outputs */
-    if ( outs.size() == 1 )
-    {
-      _os << fmt::format( ".O({}));\n", outs[0] );
-    }
-    else
-    {
-      unsigned i;
-      for ( i = 0u; i < outs.size() - 1; ++i )
-      {
-        _os << fmt::format( ".O{}({}), ", i, outs[i] );
-      }
-      _os << fmt::format( ".O{}({}));\n", i, outs[i] );
-    }
-  }
-
 protected:
   std::ostream& _os; /*!< Output stream */
 }; /* verilog_writer */
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index e79a33f69..324188a99 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -16,6 +16,7 @@
 #include <mockturtle/networks/xag.hpp>
 #include <mockturtle/networks/xmg.hpp>
 #include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/views/binding_view.hpp>
 
 using namespace mockturtle;
 
@@ -48,7 +49,7 @@ TEST_CASE( "Map of MAJ3", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   CHECK( luts.size() == 6u );
   CHECK( luts.num_pis() == 3u );
@@ -79,7 +80,7 @@ TEST_CASE( "Map of bad MAJ3 and constant output", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   CHECK( luts.size() == 6u );
   CHECK( luts.num_pis() == 3u );
@@ -110,7 +111,7 @@ TEST_CASE( "Map of full adder 1", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   const float eps{ 0.005f };
 
@@ -146,7 +147,7 @@ TEST_CASE( "Map of full adder 2", "[mapper]" )
   map_params ps;
   ps.cut_enumeration_ps.minimize_truth_table = false;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   const float eps{ 0.005f };
 
@@ -182,7 +183,7 @@ TEST_CASE( "Map with inverters", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   const float eps{ 0.005f };
 
@@ -216,7 +217,7 @@ TEST_CASE( "Map for inverters minimization", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   const float eps{ 0.005f };
 
@@ -266,16 +267,16 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
 
   map_params ps;
   map_stats st;
-  klut_network luts = map( aig, lib, ps, &st );
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
 
   const float eps{ 0.005f };
 
-  CHECK( luts.size() == 9u );
+  CHECK( luts.size() == 10u );
   CHECK( luts.num_pis() == 4u );
   CHECK( luts.num_pos() == 6u );
-  CHECK( luts.num_gates() == 3u );
-  CHECK( st.area > 5.0f - eps );
-  CHECK( st.area < 5.0f + eps );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area > 7.0f - eps );
+  CHECK( st.area < 7.0f + eps );
   CHECK( st.delay > 1.9f - eps );
   CHECK( st.delay < 1.9f + eps );
 }
diff --git a/test/io/write_verilog.cpp b/test/io/write_verilog.cpp
index 749f448c7..aef9400f2 100644
--- a/test/io/write_verilog.cpp
+++ b/test/io/write_verilog.cpp
@@ -10,6 +10,8 @@
 #include <mockturtle/networks/klut.hpp>
 #include <mockturtle/networks/mig.hpp>
 #include <mockturtle/networks/buffered.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/views/binding_view.hpp>
 
 using namespace mockturtle;
 
@@ -169,3 +171,51 @@ TEST_CASE( "write buffered AIG into Verilog file", "[write_verilog]" )
                       "  assign y0 = n6 ;\n"
                       "endmodule\n" );
 }
+
+TEST_CASE( "write mapped network into Verilog file", "[write_verilog]" )
+{
+  std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                          "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                          "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                          "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
+
+  std::vector<gate> gates;
+  std::istringstream in( simple_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  binding_view<klut_network> klut( gates );
+
+  const auto a = klut.create_pi();
+  const auto b = klut.create_pi();
+  const auto c = klut.create_pi();
+
+  /* create buffer */
+  uint64_t buf_func = 0x2;
+  kitty::dynamic_truth_table tt_buf( 1 );
+  kitty::create_from_words( tt_buf, &buf_func, &buf_func + 1 );
+  const auto buf = klut.create_node( { a }, tt_buf );
+  
+  const auto f1 = klut.create_nand( b, c );
+  const auto f2 = klut.create_not( f1 );
+
+  klut.create_po( buf );
+  klut.create_po( f1 );
+  klut.create_po( f2 );
+
+  klut.add_binding( klut.get_node( buf ), 2 );
+  klut.add_binding( klut.get_node( f1 ), 3 );
+  klut.add_binding( klut.get_node( f2 ), 1 );
+
+  std::ostringstream out;
+  write_verilog( klut, out );
+
+  CHECK( out.str() == "module top( x0 , x1 , x2 , y0 , y1 , y2 );\n"
+                      "  input x0 , x1 , x2 ;\n"
+                      "  output y0 , y1 , y2 ;\n"
+                      "  buf    g0( .a (x0), .O (y0) );\n"
+                      "  nand2  g1( .a (x1), .b (x2), .O (y1) );\n"
+                      "  inv2   g2( .a (y1), .O (y2) );\n"
+                      "endmodule\n" );
+}
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index dfb3707d2..284491872 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -16,6 +16,7 @@ using namespace mockturtle;
 
 std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
                                         "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                        "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
                                         "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
 
 std::string const test_library =  "GATE   inv1    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"

From dc4cb21f23844fa1d334f4c78086b32c8d6abf55 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 3 Aug 2021 16:40:03 +0200
Subject: [PATCH 17/40] Added compatibility with multiple pins in Genlib
 libraries, changed experiment setup for MIG map reading AIGs into MIGs

---
 experiments/mapper.cpp                    |  8 +++++++-
 include/mockturtle/utils/tech_library.hpp | 19 ++++++++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/experiments/mapper.cpp b/experiments/mapper.cpp
index fe25728ef..6f2d2dac0 100644
--- a/experiments/mapper.cpp
+++ b/experiments/mapper.cpp
@@ -98,6 +98,12 @@ int main()
   for ( auto const& benchmark : epfl_benchmarks() )
   {
     fmt::print( "[i] processing {}\n", benchmark );
+    mig_network mig;
+    if ( lorina::read_aiger( benchmark_path( benchmark ), aiger_reader( mig ) ) != lorina::return_code::success )
+    {
+      continue;
+    }
+
     aig_network aig;
     if ( lorina::read_aiger( benchmark_path( benchmark ), aiger_reader( aig ) ) != lorina::return_code::success )
     {
@@ -112,7 +118,7 @@ int main()
     ps1.required_time = std::numeric_limits<double>::max();
     map_stats st1;
 
-    mig_network res1 = map( aig, exact_lib, ps1, &st1 );
+    mig_network res1 = map( mig, exact_lib, ps1, &st1 );
 
     map_params ps2;
     ps2.cut_enumeration_ps.minimize_truth_table = true;
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 850af92a3..f104f2f56 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -204,6 +204,11 @@ class tech_library
         std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, too many variables for the library settings" << std::endl;
         continue;
       }
+      if ( gate.pins.size() != 1 && gate.pins.size() != gate.num_vars )
+      {
+        std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, pins mismatch" << std::endl;
+        continue;
+      }
 
       float worst_delay = compute_worst_delay( gate );
 
@@ -248,7 +253,11 @@ class tech_library
 
         for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
         {
-          sg.tdelay[i] = worst_delay;                     /* if pin-to-pin delay change to: gate.delay[perm[i]] */
+          if ( gate.pins.size() == 1 )
+            sg.tdelay[i] = worst_delay;
+          else
+            sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
+
           sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
         }
         for ( auto i = perm.size(); i < NInputs; ++i )
@@ -315,8 +324,12 @@ class tech_library
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
-            sg.tdelay[i] = worst_delay;   /* if pin-to-pin delay change to: gate.delay[perm[i]] */
-            sg.polarity |= phase;         /* permutate input negation to match the right pin */
+            if ( gate.pins.size() == 1 )
+              sg.tdelay[i] = worst_delay;
+            else
+              sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
+
+            sg.polarity |= phase;
           }
           for ( auto i = perm.size(); i < NInputs; ++i )
           {

From 965fc8f29202d052fa938f78e16fa80e2b27ac87 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 3 Aug 2021 18:17:27 +0200
Subject: [PATCH 18/40] Support pin names from genlib, better checks on pins
 parsing

---
 include/mockturtle/io/genlib_reader.hpp | 43 ++++++++++++++-----------
 include/mockturtle/io/write_verilog.hpp |  8 ++---
 lib/lorina/lorina/genlib.hpp            | 33 +++++++++++++++++--
 3 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/include/mockturtle/io/genlib_reader.hpp b/include/mockturtle/io/genlib_reader.hpp
index 9c30153cd..8e64753ed 100644
--- a/include/mockturtle/io/genlib_reader.hpp
+++ b/include/mockturtle/io/genlib_reader.hpp
@@ -28,6 +28,7 @@
   \brief Reader visitor for GENLIB files
 
   \author Heinz Riener
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -90,32 +91,36 @@ class genlib_reader : public lorina::genlib_reader
     : gates( gates )
   {}
 
-  virtual void on_gate( std::string const& name, std::string const& expression, double area, std::vector<lorina::pin_spec> const& ps ) const override
+  virtual void on_gate( std::string const& name, std::string const& expression, uint32_t num_vars, double area, std::vector<lorina::pin_spec> const& ps ) const override
   {
-    uint32_t num_vars{0};
-    for ( const auto& c : expression )
-    {
-      if ( c >= 'a' && c <= 'z' )
-      {
-        uint32_t const var = 1 + ( c - 'a' );
-        if ( var > num_vars )
-        {
-          num_vars = var;
-        }
-      }
-    }
-
     kitty::dynamic_truth_table tt{num_vars};
     create_from_expression( tt, expression );
 
     std::vector<pin> pp;
-    for ( const auto& p : ps )
+
+    if ( ps.size() == 1 && ps[0].name == "*" )
     {
-      pp.emplace_back( pin{p.name,
-                           phase_type( static_cast<uint8_t>( p.phase ) ),
-                           p.input_load, p.max_load,
-                           p.rise_block_delay, p.rise_fanout_delay, p.fall_block_delay, p.fall_fanout_delay} );
+      char pin_name = 'a';
+      for ( auto i = 0; i < num_vars; ++i )
+      {
+        pp.emplace_back( pin{std::string( 1, pin_name ),
+                             phase_type( static_cast<uint8_t>( ps[0].phase ) ),
+                             ps[0].input_load, ps[0].max_load,
+                             ps[0].rise_block_delay, ps[0].rise_fanout_delay, ps[0].fall_block_delay, ps[0].fall_fanout_delay} );
+        ++pin_name;
+      }
+    }
+    else
+    {
+      for ( const auto& p : ps )
+      {
+        pp.emplace_back( pin{p.name,
+                             phase_type( static_cast<uint8_t>( p.phase ) ),
+                             p.input_load, p.max_load,
+                             p.rise_block_delay, p.rise_fanout_delay, p.fall_block_delay, p.fall_fanout_delay} );
+      }
     }
+
     gates.emplace_back( gate{static_cast<unsigned int>( gates.size() ), name,
                              expression, num_vars, tt, area, pp} );
   }
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index 062e5b9a4..968f5b71b 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -517,17 +517,17 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
 
     if ( ntk.has_binding( n ) )
     {
-      std::string name = gates[ntk.get_binding_index( n )].name;
+      auto const& gate = gates[ntk.get_binding_index( n )];
+      std::string name = gate.name;
 
       int digits = counter == 0 ? 0 : ( int ) std::floor( std::log10( counter ) );
       auto fanin_names = detail::format_fanin<binding_view<Ntk>>( ntk, n, node_names );
       std::vector<std::pair<std::string,std::string>> args;
 
-      char pin_name = 'a';
+      auto i = 0;
       for ( auto pair : fanin_names )
       {
-        args.emplace_back( std::make_pair( std::string( 1, pin_name ), pair.second ) );
-        ++pin_name;
+        args.emplace_back( std::make_pair( gate.pins[i++].name, pair.second ) );
       }
       args.emplace_back( std::make_pair( "O", node_names[n] ) );
 
diff --git a/lib/lorina/lorina/genlib.hpp b/lib/lorina/lorina/genlib.hpp
index 08fa8e91e..44dde9125 100644
--- a/lib/lorina/lorina/genlib.hpp
+++ b/lib/lorina/lorina/genlib.hpp
@@ -29,6 +29,7 @@
 
   \author Heinz Riener
   \author Shubham Rai
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -72,10 +73,11 @@ struct pin_spec
 class genlib_reader
 {
 public:
-  virtual void on_gate( std::string const& name, std::string const& expression, double area, std::vector<pin_spec> const& pins ) const
+  virtual void on_gate( std::string const& name, std::string const& expression, uint32_t num_vars, double area, std::vector<pin_spec> const& pins ) const
   {
     (void)name;
     (void)expression;
+    (void)num_vars;
     (void)area;
     (void)pins;
   }
@@ -176,8 +178,22 @@ class genlib_parser
     std::string const& expression = tokens[3].substr( beg + 1, end - beg - 1 );
     double const area = std::stod( tokens[2] );
 
+    uint32_t num_vars{0};
+    for ( const auto& c : expression )
+    {
+      if ( c >= 'a' && c <= 'z' )
+      {
+        uint32_t const var = 1 + ( c - 'a' );
+        if ( var > num_vars )
+        {
+          num_vars = var;
+        }
+      }
+    }
+
     std::vector<pin_spec> pins;
 
+    bool generic_pin{false};
     uint64_t i{4};
     for ( ; i+8 < tokens.size(); i += 9 )
     {
@@ -192,6 +208,10 @@ class genlib_parser
       }
 
       std::string const& name = tokens[i+1];
+      if ( tokens[i+1] == "*" )
+      {
+        generic_pin = true;
+      }
       phase_type phase{phase_type::UNKNOWN};
       if ( tokens[i+2] == "INV" )
       {
@@ -222,6 +242,15 @@ class genlib_parser
       pins.emplace_back( pin_spec{name,phase,input_load,max_load,rise_block_delay,rise_fanout_delay,fall_block_delay,fall_fanout_delay} );
     }
 
+    if ( pins.size() != num_vars && !( pins.size() == 1 && generic_pin ) )
+    {
+      if ( diag )
+      {
+        diag->report( diag_id::ERR_GENLIB_PIN ).add_argument( tokens[i] );
+      }
+      return false;
+    }
+
     if ( i != tokens.size() )
     {
       if ( diag )
@@ -231,7 +260,7 @@ class genlib_parser
       return false;
     }
 
-    reader.on_gate( name, expression, area, pins );
+    reader.on_gate( name, expression, num_vars, area, pins );
     return true;
   }
 

From 873c8b0071873d1776f8583ea0799af472eddbb4 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 3 Aug 2021 18:43:39 +0200
Subject: [PATCH 19/40] Genilb test corrections

---
 test/io/genlib_reader.cpp | 69 ++++++++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/test/io/genlib_reader.cpp b/test/io/genlib_reader.cpp
index 9fe02e23c..3f338ad1e 100644
--- a/test/io/genlib_reader.cpp
+++ b/test/io/genlib_reader.cpp
@@ -18,13 +18,14 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
     "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
     "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
     "GATE and 5 O=(ab); PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+    "GATE or 5 O={ab}; PIN n1 NONINV 1 999 1.0 1.0 1.0 1.0; PIN n2 NONINV 1 999 0.98 1.0 0.98 1.0\n"
   };
 
   std::istringstream in( file );
   auto const result = lorina::read_genlib( in, mockturtle::genlib_reader( gates ) );
   CHECK( result == lorina::return_code::success );
 
-  CHECK( gates.size() == 5u );
+  CHECK( gates.size() == 6u );
   CHECK( gates[0u].id == 0u );
   CHECK( gates[0u].name == "zero" );
   CHECK( gates[0u].expression == "0" );
@@ -48,7 +49,7 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[2u].num_vars == 1 );
   CHECK( gates[2u].area == 1.0 );
   CHECK( gates[2u].pins.size() == 1 );
-  CHECK( gates[2u].pins[0u].name == "*" );
+  CHECK( gates[2u].pins[0u].name == "a" );
   CHECK( gates[2u].pins[0u].phase == phase_type::INV );
   CHECK( gates[2u].pins[0u].input_load == 1.0 );
   CHECK( gates[2u].pins[0u].max_load == 999.0 );
@@ -64,15 +65,14 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[3u].num_vars == 1 );
   CHECK( gates[3u].area == 2.0 );
   CHECK( gates[3u].pins.size() == 1 );
-  CHECK( gates[3u].pins[0u].name == "*" );
+  CHECK( gates[3u].pins[0u].name == "a" );
   CHECK( gates[3u].pins[0u].phase == phase_type::NONINV );
-  CHECK( gates[2u].pins[0u].phase == phase_type::INV );
-  CHECK( gates[2u].pins[0u].input_load == 1.0 );
-  CHECK( gates[2u].pins[0u].max_load == 999.0 );
-  CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[3u].pins[0u].input_load == 1.0 );
+  CHECK( gates[3u].pins[0u].max_load == 999.0 );
+  CHECK( gates[3u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[3u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[3u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[3u].pins[0u].rise_fanout_delay == 1.0 );
 
   CHECK( gates[4u].id == 4u );
   CHECK( gates[4u].name == "and" );
@@ -80,14 +80,45 @@ TEST_CASE( "read genlib file", "[genlib_reader]" )
   CHECK( gates[4u].function._bits[0] == 8 );
   CHECK( gates[4u].num_vars == 2 );
   CHECK( gates[4u].area == 5.0 );
-  CHECK( gates[4u].pins.size() == 1 );
-  CHECK( gates[4u].pins[0u].name == "*" );
+  CHECK( gates[4u].pins.size() == 2 );
+  CHECK( gates[4u].pins[0u].name == "a" );
   CHECK( gates[4u].pins[0u].phase == phase_type::NONINV );
-  CHECK( gates[2u].pins[0u].phase == phase_type::INV );
-  CHECK( gates[2u].pins[0u].input_load == 1.0 );
-  CHECK( gates[2u].pins[0u].max_load == 999.0 );
-  CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_block_delay == 1.0 );
-  CHECK( gates[2u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[4u].pins[0u].input_load == 1.0 );
+  CHECK( gates[4u].pins[0u].max_load == 999.0 );
+  CHECK( gates[4u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[4u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[4u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[4u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[4u].pins[1u].name == "b" );
+  CHECK( gates[4u].pins[1u].phase == phase_type::NONINV );
+  CHECK( gates[4u].pins[1u].input_load == 1.0 );
+  CHECK( gates[4u].pins[1u].max_load == 999.0 );
+  CHECK( gates[4u].pins[1u].rise_block_delay == 1.0 );
+  CHECK( gates[4u].pins[1u].rise_fanout_delay == 1.0 );
+  CHECK( gates[4u].pins[1u].rise_block_delay == 1.0 );
+  CHECK( gates[4u].pins[1u].rise_fanout_delay == 1.0 );
+
+  CHECK( gates[5u].id == 5u );
+  CHECK( gates[5u].name == "or" );
+  CHECK( gates[5u].expression == "{ab}" );
+  CHECK( gates[5u].function._bits[0] == 0xe );
+  CHECK( gates[5u].num_vars == 2 );
+  CHECK( gates[5u].area == 5.0 );
+  CHECK( gates[5u].pins.size() == 2 );
+  CHECK( gates[5u].pins[0u].name == "n1" );
+  CHECK( gates[5u].pins[0u].phase == phase_type::NONINV );
+  CHECK( gates[5u].pins[0u].input_load == 1.0 );
+  CHECK( gates[5u].pins[0u].max_load == 999.0 );
+  CHECK( gates[5u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[5u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[5u].pins[0u].rise_block_delay == 1.0 );
+  CHECK( gates[5u].pins[0u].rise_fanout_delay == 1.0 );
+  CHECK( gates[5u].pins[1u].name == "n2" );
+  CHECK( gates[5u].pins[1u].phase == phase_type::NONINV );
+  CHECK( gates[5u].pins[1u].input_load == 1.0 );
+  CHECK( gates[5u].pins[1u].max_load == 999.0 );
+  CHECK( gates[5u].pins[1u].rise_block_delay == 0.98 );
+  CHECK( gates[5u].pins[1u].rise_fanout_delay == 1.0 );
+  CHECK( gates[5u].pins[1u].rise_block_delay == 0.98 );
+  CHECK( gates[5u].pins[1u].rise_fanout_delay == 1.0 );
 }

From c3badbf2f2f05267f18daba7124d39cd5dad9ef0 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Fri, 6 Aug 2021 17:13:09 +0200
Subject: [PATCH 20/40] Supergates importing and representation utilities

---
 include/mockturtle/io/super_reader.hpp   | 103 ++++++++
 include/mockturtle/utils/super_utils.hpp | 278 +++++++++++++++++++++
 lib/lorina/lorina/diagnostics.inc        |   5 +
 lib/lorina/lorina/super.hpp              | 293 +++++++++++++++++++++++
 4 files changed, 679 insertions(+)
 create mode 100644 include/mockturtle/io/super_reader.hpp
 create mode 100644 include/mockturtle/utils/super_utils.hpp
 create mode 100644 lib/lorina/lorina/super.hpp

diff --git a/include/mockturtle/io/super_reader.hpp b/include/mockturtle/io/super_reader.hpp
new file mode 100644
index 000000000..a90e9e47f
--- /dev/null
+++ b/include/mockturtle/io/super_reader.hpp
@@ -0,0 +1,103 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file genlib_reader.hpp
+  \brief Reader visitor for GENLIB files
+
+  \author Alessandro Tempia Calvino
+  \author Shubham Rai
+*/
+
+#pragma once
+
+#include "../traits.hpp"
+
+#include <fmt/format.h>
+#include <kitty/constructors.hpp>
+#include <lorina/super.hpp>
+
+namespace mockturtle
+{
+
+struct supergate_spec
+{
+  unsigned int id;
+  std::string name{};
+  bool is_super{ false };
+  std::vector<uint32_t> fanins_id;
+};
+
+struct super_lib
+{
+  std::string genlib_name{};
+  uint32_t max_num_vars{ 0u };
+  uint32_t num_superGates{ 0u };
+  uint32_t num_lines{ 0 };
+  std::vector<supergate_spec> supergates{};
+};
+
+/*! \brief lorina callbacks for SUPER files.
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      std::vector<mockturtle::supergates_spec> supergates;
+      lorina::read_genlib( "file.super", mockturtle::super_reader( supergates ) );
+   \endverbatim
+ */
+class super_reader : public lorina::super_reader
+{
+public:
+  explicit super_reader( super_lib& lib )
+      : lib( lib )
+  {
+  }
+
+  virtual void on_super_info( std::string const& genlib_name, uint32_t max_num_vars, uint32_t max_superGates, uint32_t num_lines ) const override
+  {
+    lib.genlib_name = genlib_name;
+    lib.max_num_vars = max_num_vars;
+    lib.num_superGates = max_superGates; 
+    lib.num_lines = num_lines;
+  }
+
+  virtual void on_supergate( std::string const& name, bool const& is_super, std::vector<uint32_t> const& fanins_id ) const override
+  {
+
+    lib.supergates.emplace_back( supergate_spec{ lib.supergates.size(),
+                                                 name,
+                                                 is_super,
+                                                 fanins_id } );
+  }
+
+protected:
+  super_lib& lib;
+}; /* super_reader */
+
+} /* namespace mockturtle */
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
new file mode 100644
index 000000000..c21fc0bd1
--- /dev/null
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -0,0 +1,278 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file tech_library.hpp
+  \brief Implements utilities to create supergates for technology mapping
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <cassert>
+#include <unordered_map>
+#include <vector>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/npn.hpp>
+#include <kitty/print.hpp>
+#include <kitty/static_truth_table.hpp>
+#include <lorina/lorina.hpp>
+
+#include "../io/genlib_reader.hpp"
+#include "../io/super_reader.hpp"
+#include "../traits.hpp"
+#include "../utils/truth_table_cache.hpp"
+
+namespace mockturtle
+{
+
+template<unsigned NInputs>
+struct composed_gate
+{
+  uint32_t id;
+  int32_t root_id{ -1 };
+  kitty::dynamic_truth_table function;
+  double area{ 0.0f };
+  std::array<float, NInputs> tdelay{};
+  std::vector<uint32_t> fanin{};
+};
+
+template<unsigned NInputs = 5u>
+class supergate_utils
+{
+public:
+  explicit supergate_utils( std::vector<gate> const& gates, super_lib const& supergates_spec = {} )
+      : _gates( gates ),
+        _supergates_spec( supergates_spec ),
+        _supergates()
+  {
+    if ( _supergates_spec.supergates.size() == 0 )
+    {
+      compute_library_with_genlib();
+    }
+    else
+    {
+      generate_library_with_super();
+    }
+  }
+
+  const std::vector<composed_gate<NInputs>>& get_super_library() const
+  {
+    return _supergates;
+  }
+
+public:
+  void compute_library_with_genlib()
+  {
+    for ( const auto& g : _gates )
+    {
+      std::array<float, NInputs> pin_to_pin_delays{};
+
+      auto i = 0u;
+      for ( auto const& pin : g.pins )
+      {
+        /* use worst pin delay */
+        pin_to_pin_delays[i] = std::max( pin.rise_block_delay, pin.fall_block_delay );
+      }
+
+      composed_gate<NInputs> s = {_supergates.size(),
+                                  g.id,
+                                  g.function,
+                                  g.area,
+                                  pin_to_pin_delays,
+                                  {}};
+
+      _supergates.emplace_back( s );
+    }
+  }
+
+  void generate_library_with_super()
+  {
+    if ( _supergates_spec.max_num_vars > NInputs )
+    {
+      std::cerr << fmt::format(
+        "ERROR: NInputs ({}) should be greater or equal than the max number of variables ({}) in the super file.\n", NInputs, _supergates_spec.max_num_vars
+        );
+      std::cerr << "WARNING: ignoring supergates, proceeding with standard library." << std::endl;
+      compute_library_with_genlib();
+      return;
+    }
+
+    /* create a map for the gates IDs */
+    std::unordered_map<std::string, uint32_t> gates_map;
+
+    for ( auto const& g : _gates )
+    {
+      if ( gates_map.find( g.name ) != gates_map.end() )
+      {
+        std::cerr << fmt::format( "WARNING: ignoring genlib gate {}, duplicated name entry.", g.name ) << std::endl;
+      }
+      else
+      {
+        gates_map[g.name] = g.id;
+      }
+    }
+
+    /* creating input variables */
+    for ( uint8_t i = 0; i < _supergates_spec.max_num_vars; ++i )
+    {
+      kitty::dynamic_truth_table tt{ NInputs };
+      kitty::create_nth_var( tt, i );
+
+      composed_gate<NInputs> s = {i,
+                                  -1,
+                                  tt,
+                                  0.0f,
+                                  {},
+                                  {}};
+
+      _supergates.emplace_back( s );
+    }
+
+    for ( auto const g : _supergates_spec.supergates )
+    {
+      uint32_t root_match_id;
+      if ( auto it = gates_map.find( g.name ); it != gates_map.end() )
+      {
+        root_match_id = it->second;
+      }
+      else
+      {
+        std::cerr << fmt::format( "WARNING: ignoring supergate {}, no reference in genlib.", g.id ) << std::endl;
+        continue;
+      }
+
+      uint32_t num_vars = _gates[root_match_id].num_vars;
+
+      if ( num_vars != g.fanins_id.size() )
+      {
+        std::cerr << fmt::format( "WARNING: ignoring supergate {}, wrong number of fanins.", g.id ) << std::endl;
+        continue;
+      }
+      if ( num_vars > _supergates_spec.max_num_vars )
+      {
+        std::cerr << fmt::format( "WARNING: ignoring supergate {}, too many variables for the library settings.", g.id ) << std::endl;
+        continue;
+      }
+
+      std::vector<uint32_t> sub_gates;
+
+      bool error = false;
+      for ( uint32_t f : g.fanins_id )
+      {
+        if ( f >= g.id + _supergates_spec.max_num_vars )
+        {
+          error = true;
+          std::cerr << fmt::format( "WARNING: ignoring supergate {}, wrong fanins.", g.id ) << std::endl;
+        }
+        sub_gates.emplace_back( _supergates[f].id );
+      }
+
+      if ( error )
+      {
+        continue;
+      }
+
+      float area = compute_area( root_match_id, sub_gates );
+      const auto tt = compute_truth_table( root_match_id, sub_gates );
+
+      composed_gate<NInputs> s = {_supergates.size(),
+                                  root_match_id,
+                                  tt,
+                                  area,
+                                  {},
+                                  sub_gates};
+
+      compute_delay_parameters( s );
+
+      _supergates.emplace_back( s );
+    }
+  }
+
+private:
+  inline float compute_area( uint32_t root_id, std::vector<uint32_t> const& sub_gates )
+  {
+    float area = _gates[root_id].area;
+    for ( auto const& id : sub_gates )
+    {
+      area += _supergates[id].area;
+    }
+
+    return area;
+  }
+
+  inline kitty::dynamic_truth_table compute_truth_table( uint32_t root_id, std::vector<uint32_t> const& sub_gates )
+  {
+    std::vector<kitty::dynamic_truth_table> ttv;
+
+    for ( auto const& id : sub_gates )
+    {
+      ttv.emplace_back( _supergates[id].function );
+    }
+
+    return kitty::compose_truth_table( _gates[root_id].function, ttv );
+  }
+
+  inline void compute_delay_parameters( composed_gate<NInputs>& s )
+  {
+    const auto& root = _gates[s.root_id];
+
+    auto i = 0u;
+    for ( auto const& pin : root.pins )
+    {
+      float worst_delay = std::max( pin.rise_block_delay, pin.fall_block_delay );
+
+      compute_delay_pin_rec( s, _supergates[s.fanin[i++]], worst_delay );
+    }
+  }
+
+  void compute_delay_pin_rec( composed_gate<NInputs>& root, composed_gate<NInputs>& s, float delay )
+  {
+    /* termination: input variable */
+    if ( s.root_id == -1 )
+    {
+      root.tdelay[s.id] = std::max( root.tdelay[s.id], delay );
+      return;
+    }
+
+    auto i = 0u;
+    for ( auto const& pin : _gates[s.root_id].pins )
+    {
+      float worst_delay = delay + std::max( pin.rise_block_delay, pin.fall_block_delay );
+
+      compute_delay_pin_rec( root, _supergates[s.fanin[i++]], worst_delay );
+    }
+  }
+
+protected:
+  std::vector<gate> const& _gates;
+  super_lib const& _supergates_spec;
+  std::vector<composed_gate<NInputs>> _supergates;
+}; /* Class supergate_utils */
+
+} /* namespace mockturtle */
diff --git a/lib/lorina/lorina/diagnostics.inc b/lib/lorina/lorina/diagnostics.inc
index 9fbd213c5..6436ffa12 100644
--- a/lib/lorina/lorina/diagnostics.inc
+++ b/lib/lorina/lorina/diagnostics.inc
@@ -37,6 +37,11 @@ enum class diag_id
   ERR_GENLIB_PIN_PHASE,
   ERR_GENLIB_FAILED,
 
+  /* SUPER */
+  ERR_SUPER_INFO,
+  ERR_SUPER_UNEXPECTED_STRUCTURE,
+  ERR_SUPER_GATE,
+
   /* sentinel element to mark the end */
   NUM_STATIC_ERROR_IDS,
 };
diff --git a/lib/lorina/lorina/super.hpp b/lib/lorina/lorina/super.hpp
new file mode 100644
index 000000000..b38d577a4
--- /dev/null
+++ b/lib/lorina/lorina/super.hpp
@@ -0,0 +1,293 @@
+/* lorina: C++ parsing library
+ * Copyright (C) 2018-2021  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file super.hpp
+  \brief Implements SUPER parser for files generated by ABC
+
+  \author Alessandro Tempia Calvino
+  \author Shubham Rai
+*/
+
+#pragma once
+
+#include "common.hpp"
+#include "detail/utils.hpp"
+#include "diagnostics.hpp"
+#include <fstream>
+#include <istream>
+#include <optional>
+#include <sstream>
+#include <string>
+
+namespace lorina
+{
+
+/*! \brief A reader visitor for a super format.
+ *
+ * Callbacks for the super format.
+ */
+class super_reader
+{
+public:
+  virtual void on_super_info( std::string const& genlib_name, uint32_t max_num_vars, uint32_t max_supergates, uint32_t num_lines ) const
+  {
+    (void) genlib_name;
+    (void) max_num_vars;
+    (void) max_supergates;
+    (void) num_lines;
+  }
+
+  virtual void on_supergate( std::string const& name, bool const& is_super, std::vector<uint32_t> const& fanins_id ) const
+  {
+    (void)name;
+    (void)is_super;
+    (void)fanins_id;
+  }
+}; /* super_reader */
+
+/*! \brief Parse for the SUPER format.
+ *
+ */
+class super_parser
+{
+public:
+  explicit super_parser( std::istream& in, super_reader const& reader, diagnostic_engine* diag )
+      : in( in )
+      , reader( reader )
+      , diag( diag )
+  {
+  }
+
+public:
+  bool run()
+  {
+    std::string line;
+    uint32_t info_lines = 0u;
+    uint32_t max_num_vars = 0u;
+
+    std::vector<std::string> info_vec;
+
+    while ( std::getline( in, line ) )
+    {
+      /* remove whitespaces */
+      detail::trim( line );
+
+      /* skip comments and empty lines */
+      if ( line[0] == '#' || line.empty() )
+      {
+        continue;
+      }
+
+      /* end of file char */
+      if ( line[0] == '\0' )
+      {
+        return true;
+      }
+
+      if ( info_lines < 4 )
+      {
+        if ( !parse_file_info( line, info_vec ) )
+        {
+          return false;
+        }
+        if ( info_lines == 1 )
+        {
+          max_num_vars = std::stod( info_vec[1] );
+        }
+        ++info_lines;
+      }
+      else
+      {
+        if ( !parse_gate_definition( line, max_num_vars ) )
+        {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+private:
+  bool parse_file_info( std::string const& line, std::vector<std::string>& info_vec )
+  {
+    std::stringstream ss( line );
+    std::string const deliminators{ " \t\r\n" };
+    std::string token;
+
+    std::vector<std::string> tokens;
+
+    while ( std::getline( ss, token, '\n' ) )
+    {
+      tokens.emplace_back( token );
+      info_vec.emplace_back( token );
+    }
+
+    if ( tokens.size() > 2 )
+    {
+      if ( diag )
+      {
+        diag->report( diag_id::ERR_SUPER_INFO ).add_argument( line );
+      }
+      return false;
+    }
+
+    if ( info_vec.size() == 4 )
+    {
+      reader.on_super_info( info_vec[0], std::stoi( info_vec[1] ), std::stoi( info_vec[2] ), std::stoi( info_vec[3] ) );
+    }
+
+    return true;
+  }
+
+  bool parse_gate_definition( std::string const& line, uint32_t const& max_num_vars )
+  {
+    std::stringstream ss( line );
+    std::string const deliminators{ " \t\r\n" };
+
+    std::string token;
+    std::vector<std::string> tokens;
+
+    std::string name;
+    std::vector<uint32_t> fanins_id;
+
+    while ( std::getline( ss, token ) )
+    {
+      std::size_t prev = 0, pos;
+      while ( ( pos = line.find_first_of( deliminators, prev ) ) != std::string::npos )
+      {
+        if ( pos > prev )
+        {
+          tokens.emplace_back( token.substr( prev, pos - prev ) );
+        }
+        prev = pos + 1;
+      }
+
+      if ( prev < line.length() )
+      {
+        tokens.emplace_back( token.substr( prev, std::string::npos ) );
+      }
+    }
+
+    if ( tokens.size() < 2 || tokens.size() > max_num_vars + 2 )
+    {
+      if ( diag )
+      {
+        diag->report( diag_id::ERR_SUPER_UNEXPECTED_STRUCTURE ).add_argument( line );
+      }
+      return false;
+    }
+
+    bool is_super = false;
+    uint64_t i{2};
+    if ( tokens[0] == "*" )
+    {
+      is_super = true;
+      name = tokens[1];
+    }
+    else
+    {
+      name = tokens[0];
+      i = 1u;
+    }
+
+    for ( auto j = i; j < tokens.size(); ++j )
+    {
+      fanins_id.emplace_back( std::stod( tokens[j] ) );
+    }
+
+    if ( fanins_id.size() == 0 )
+    {
+      if ( diag )
+      {
+        diag->report( diag_id::ERR_SUPER_GATE ).add_argument( line );
+      }
+      return false;
+    }
+
+    reader.on_supergate( name, is_super, fanins_id );
+    return true;
+  }
+
+protected:
+  std::istream& in;
+  super_reader const& reader;
+  diagnostic_engine* diag;
+}; /* super_parser */
+
+/*! \brief Reader function for the SUPER format.
+ *
+ * Reads SUPER format from a stream and invokes a callback
+ * method for each parsed primitive and each detected parse error.
+ *
+ * \param in Input stream
+ * \param reader SUPER reader with callback methods invoked for parsed primitives
+ * \param diag An optional diagnostic engine with callback methods for parse errors
+ * \return Success if parsing has been successful, or parse error if parsing has failed
+ */
+[[nodiscard]] inline return_code read_super( std::istream& in, const super_reader& reader, diagnostic_engine* diag = nullptr )
+{
+  super_parser parser( in, reader, diag );
+  auto result = parser.run();
+  if ( !result )
+  {
+    return return_code::parse_error;
+  }
+  else
+  {
+    return return_code::success;
+  }
+}
+
+/*! \brief Reader function for the SUPER format.
+ *
+ * Reads SUPER format from a .super file generated by ABC and invokes a callback
+ * method for each parsed primitive and each detected parse error.
+ *
+ * \param filename Name of the file
+ * \param reader SUPER reader with callback methods invoked for parsed primitives
+ * \param diag An optional diagnostic engine with callback methods for parse errors
+ * \return Success if parsing has been successful, or parse error if parsing has failed
+ */
+[[nodiscard]] inline return_code read_super( const std::string& filename, const super_reader& reader, diagnostic_engine* diag = nullptr )
+{
+  std::ifstream in( detail::word_exp_filename( filename ), std::ifstream::in );
+  if ( !in.is_open() )
+  {
+    if ( diag )
+    {
+      diag->report( diag_id::ERR_FILE_OPEN ).add_argument( filename );
+    }
+    return return_code::parse_error;
+  }
+  else
+  {
+    auto const ret = read_super( in, reader, diag );
+    in.close();
+    return ret;
+  }
+}
+
+} // namespace lorina

From df39afd8ea22b348351df78699e85ee0c87d0deb Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Fri, 6 Aug 2021 19:58:31 +0200
Subject: [PATCH 21/40] Updates for supergates support

---
 include/mockturtle/utils/super_utils.hpp  | 35 +++++++++++++
 include/mockturtle/utils/tech_library.hpp | 60 ++++++++++++++++++-----
 2 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index c21fc0bd1..d631c5682 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -55,6 +55,7 @@ template<unsigned NInputs>
 struct composed_gate
 {
   uint32_t id;
+  bool is_super{false};
   int32_t root_id{ -1 };
   kitty::dynamic_truth_table function;
   double area{ 0.0f };
@@ -101,6 +102,7 @@ class supergate_utils
       }
 
       composed_gate<NInputs> s = {_supergates.size(),
+                                  false,
                                   g.id,
                                   g.function,
                                   g.area,
@@ -145,6 +147,7 @@ class supergate_utils
       kitty::create_nth_var( tt, i );
 
       composed_gate<NInputs> s = {i,
+                                  false,
                                   -1,
                                   tt,
                                   0.0f,
@@ -154,6 +157,7 @@ class supergate_utils
       _supergates.emplace_back( s );
     }
 
+    /* add supergates */
     for ( auto const g : _supergates_spec.supergates )
     {
       uint32_t root_match_id;
@@ -202,6 +206,7 @@ class supergate_utils
       const auto tt = compute_truth_table( root_match_id, sub_gates );
 
       composed_gate<NInputs> s = {_supergates.size(),
+                                  g.is_super,
                                   root_match_id,
                                   tt,
                                   area,
@@ -212,6 +217,36 @@ class supergate_utils
 
       _supergates.emplace_back( s );
     }
+
+    /* add constants and single input gates which are not represented in SUPER */
+    for ( auto& gate : _gates )
+    {
+      if ( gate.function.num_vars() == 0 )
+      {
+        /* constants */
+        composed_gate<NInputs> s = {_supergates.size(),
+                                    false,
+                                    gate.id,
+                                    gate.function,
+                                    gate.area,
+                                    {},
+                                    {}};
+        _supergates.emplace_back( s );
+      }
+      else if ( gate.function.num_vars() == 1 )
+      {
+        /* inverter or buffer */
+        composed_gate<NInputs> s = {_supergates.size(),
+                                    false,
+                                    gate.id,
+                                    gate.function,
+                                    gate.area,
+                                    {},
+                                    {}};
+        s.tdelay[0] = std::max( gate.pins[0].rise_block_delay, gate.pins[0].fall_block_delay );
+        _supergates.emplace_back( s );
+      }
+    }
   }
 
 private:
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index f104f2f56..95c86baab 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -42,7 +42,9 @@
 #include <kitty/print.hpp>
 #include <kitty/static_truth_table.hpp>
 
+#include "super_utils.hpp"
 #include "../io/genlib_reader.hpp"
+#include "../io/super_reader.hpp"
 
 namespace mockturtle
 {
@@ -83,6 +85,9 @@ enum class classification_type : uint32_t
 
 struct tech_library_params
 {
+  /*! \brief use configurations from SUPER library */
+  bool use_supergates_configurations = false;
+
   /*! \brief reports np enumerations */
   bool verbose{ false };
 
@@ -140,9 +145,23 @@ class tech_library
   using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
 
 public:
-  explicit tech_library( std::vector<gate> const& gates, tech_library_params const ps = {} )
+  explicit tech_library( std::vector<gate> const& gates, tech_library_params const ps = {}, super_lib const& supergates_spec = {} )
       : _gates( gates ),
+        _supergates_spec( supergates_spec ),
         _ps( ps ),
+        _use_supergates( false ),
+        _supergates(),
+        _super_lib()
+  {
+    generate_library();
+  }
+
+  explicit tech_library( std::vector<gate> const& gates, super_lib const& supergates_spec, tech_library_params const ps = {} )
+      : _gates( gates ),
+        _supergates_spec( supergates_spec ),
+        _ps( ps ),
+        _use_supergates( true ),
+        _supergates(),
         _super_lib()
   {
     generate_library();
@@ -197,6 +216,9 @@ class tech_library
     bool inv = false;
     bool buf = false;
 
+    supergate_utils<NInputs> super( _gates, _supergates_spec );
+    _supergates = super.get_super_library();
+
     for ( auto& gate : _gates )
     {
       if ( gate.function.num_vars() > NInputs )
@@ -209,7 +231,7 @@ class tech_library
         std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, pins mismatch" << std::endl;
         continue;
       }
-
+      
       float worst_delay = compute_worst_delay( gate );
 
       if ( gate.function.num_vars() == 1 )
@@ -238,7 +260,10 @@ class tech_library
           }
         }
       }
+    // }
 
+    // for ( auto const& gate : _supergates )
+    // {
       _max_size = std::max( _max_size, gate.num_vars );
 
       uint32_t np_count = 0;
@@ -253,11 +278,7 @@ class tech_library
 
         for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
         {
-          if ( gate.pins.size() == 1 )
-            sg.tdelay[i] = worst_delay;
-          else
-            sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
-
+          sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
           sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
         }
         for ( auto i = perm.size(); i < NInputs; ++i )
@@ -324,11 +345,7 @@ class tech_library
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
-            if ( gate.pins.size() == 1 )
-              sg.tdelay[i] = worst_delay;
-            else
-              sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
-
+            sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
             sg.polarity |= phase;
           }
           for ( auto i = perm.size(); i < NInputs; ++i )
@@ -394,6 +411,21 @@ class tech_library
         kitty::exact_p_enumeration( tt, on_p );
       }
 
+      /* supergates */
+      if ( _use_supergates )
+      {
+        for ( auto const& g : _supergates )
+        {
+          /* ignore simple gates */
+          if ( !g.is_super )
+          {
+            continue;
+          }
+
+          /* build supergate */
+        }
+      }
+
       if ( _ps.verbose )
       {
         std::cout << "Gate " << gate.name << ", num_vars = " << gate.num_vars << ", np entries = " << np_count << std::endl;
@@ -451,8 +483,12 @@ class tech_library
 
   unsigned _max_size{ 0 }; /* max #fanins of the gates in the library */
 
+  bool _use_supergates; 
+
   std::vector<gate> const _gates; /* collection of gates */
+  super_lib const& _supergates_spec; /* collection of supergates declarations */
   tech_library_params const _ps;
+  std::vector<composed_gate<NInputs>> _supergates; /* collection of supergates */
   lib_t _super_lib; /* library of enumerated gates */
 };
 

From b92dc7ad00a00e52f7597ba6f17cb34f0185aea0 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 11:30:58 +0200
Subject: [PATCH 22/40] Added supergates support

---
 include/mockturtle/algorithms/mapper.hpp  |  54 +++-
 include/mockturtle/io/write_verilog.hpp   |   1 -
 include/mockturtle/utils/super_utils.hpp  | 242 ++++++++++++------
 include/mockturtle/utils/tech_library.hpp | 294 +++++++++++++---------
 4 files changed, 377 insertions(+), 214 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 2e993d58c..5623f65f8 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -1114,14 +1114,14 @@ class tech_map_impl
     if ( supergates_zero != nullptr )
     {
       node_data.best_supergate[0] = &( ( *supergates_zero )[0] );
-      node_data.arrival[0] = node_data.best_supergate[0]->worstDelay;
+      node_data.arrival[0] = node_data.best_supergate[0]->tdelay[0];
       node_data.area[0] = node_data.best_supergate[0]->area;
       node_data.phase[0] = 0;
     }
     if ( supergates_one != nullptr )
     {
       node_data.best_supergate[1] = &( ( *supergates_one )[0] );
-      node_data.arrival[1] = node_data.best_supergate[1]->worstDelay;
+      node_data.arrival[1] = node_data.best_supergate[1]->tdelay[0];
       node_data.area[1] = node_data.best_supergate[1]->area;
       node_data.phase[1] = 0;
     }
@@ -1371,7 +1371,6 @@ class tech_map_impl
       if ( node_data.same_match || node_data.map_refs[phase] > 0 )
       {
         create_lut_for_gate( res, old2new, index, phase );
-        res.add_binding( res.get_node( old2new[index][phase] ), node_match[index].best_supergate[phase]->root->id );
 
         /* add inverted version if used */
         if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
@@ -1386,7 +1385,6 @@ class tech_map_impl
       if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
       {
         create_lut_for_gate( res, old2new, index, phase );
-        res.add_binding( res.get_node( old2new[index][phase] ), node_match[index].best_supergate[phase]->root->id );
       }
 
       return true;
@@ -1419,14 +1417,14 @@ class tech_map_impl
     st.delay = delay;
     if ( ps.eswp_rounds )
       st.power = compute_switching_power();
-    compute_gates_usage();
+    // compute_gates_usage();
   }
 
   void create_lut_for_gate( binding_view<klut_network>& res, klut_map& old2new, uint32_t index, unsigned phase )
   {
     auto const& node_data = node_match[index];
     auto& best_cut = cuts.cuts( index )[node_data.best_cut[phase]];
-    auto const gate = node_data.best_supergate[phase]->root;
+    auto const& gate = node_data.best_supergate[phase]->root;
 
     /* permutate and negate to obtain the matched gate truth table */
     std::vector<signal<klut_network>> children( gate->num_vars );
@@ -1440,11 +1438,47 @@ class tech_map_impl
       ++ctr;
     }
 
-    /* create the node */
-    auto f = res.create_node( children, gate->function );
+    if ( !gate->is_super )
+    {
+      /* create the node */
+      auto f = res.create_node( children, gate->function );
+      res.add_binding( res.get_node( f ), gate->root->id );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+    else
+    {
+      /* supergate, create sub-gates */
+      auto f = create_lut_for_gate_rec( res, *gate, children );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+  }
+
+  signal<klut_network> create_lut_for_gate_rec( binding_view<klut_network>& res, composed_gate<NInputs> const& gate, std::vector<signal<klut_network>> const& children )
+  {
+    std::vector<signal<klut_network>> children_local( gate.fanin.size() );
+
+    auto i = 0u;
+    for ( auto const fanin : gate.fanin )
+    {
+      if ( fanin->root == nullptr )
+      {
+        /* terminal condition */
+        children_local[i] = children[fanin->id];
+      }
+      else
+      {
+        children_local[i] = create_lut_for_gate_rec( res, *fanin, children );
+      }
+      ++i;
+    }
 
-    /* add the node in the data structure */
-    old2new[index][phase] = f;
+    auto f = res.create_node( children_local, gate.root->function );
+    res.add_binding( res.get_node( f ), gate.root->id );
+    return f;
   }
 
   template<bool DO_AREA>
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index 968f5b71b..4082e81d0 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -504,7 +504,6 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
 
   topo_view ntk_topo{ntk};
 
-  /* if node drives multiple POs, dupplicate */
   ntk_topo.foreach_node( [&]( auto const& n ) {
     if ( po_nodes.has( n ) )
     {
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index d631c5682..633243606 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -35,6 +35,7 @@
 #include <cassert>
 #include <unordered_map>
 #include <vector>
+#include <deque>
 
 #include <kitty/constructors.hpp>
 #include <kitty/dynamic_truth_table.hpp>
@@ -51,30 +52,53 @@
 namespace mockturtle
 {
 
+struct super_utils_params
+{
+  /*! \brief reports loaded supergates */
+  bool verbose{ false };
+};
+
 template<unsigned NInputs>
 struct composed_gate
 {
+  /* unique ID */
   uint32_t id;
+
+  /* gate is a supergate */
   bool is_super{false};
-  int32_t root_id{ -1 };
+
+  /* pointer to the root library gate */
+  gate const* root{ nullptr };
+
+  /* support of the composed gate */
+  uint32_t num_vars{0};
+
+  /* function */
   kitty::dynamic_truth_table function;
+
+  /* area */
   double area{ 0.0f };
+
+  /* pin-to-pin delays */
   std::array<float, NInputs> tdelay{};
-  std::vector<uint32_t> fanin{};
+
+  /* fanin gates */
+  std::vector<composed_gate<NInputs>*> fanin{};
 };
 
 template<unsigned NInputs = 5u>
-class supergate_utils
+class super_utils
 {
 public:
-  explicit supergate_utils( std::vector<gate> const& gates, super_lib const& supergates_spec = {} )
+  explicit super_utils( std::vector<gate> const& gates, super_lib const& supergates_spec = {}, super_utils_params const ps = {} )
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
+        _ps( ps ),
         _supergates()
   {
     if ( _supergates_spec.supergates.size() == 0 )
     {
-      compute_library_with_genlib();
+      generate_library_with_genlib();
     }
     else
     {
@@ -82,34 +106,53 @@ class supergate_utils
     }
   }
 
-  const std::vector<composed_gate<NInputs>>& get_super_library() const
+  const std::deque<composed_gate<NInputs>>& get_super_library() const
   {
     return _supergates;
   }
 
+  const uint32_t get_standard_library_size() const
+  {
+    return simple_gates_size;
+  }
+
 public:
-  void compute_library_with_genlib()
+  void generate_library_with_genlib()
   {
+    uint32_t initial_size = _supergates.size();
+
     for ( const auto& g : _gates )
     {
       std::array<float, NInputs> pin_to_pin_delays{};
 
+      if ( g.function.num_vars() > NInputs )
+      {
+        std::cerr << "[i] WARNING: gate " << g.name << " IGNORED, too many variables for the library settings" << std::endl;
+        continue;
+      }
+
       auto i = 0u;
       for ( auto const& pin : g.pins )
       {
         /* use worst pin delay */
-        pin_to_pin_delays[i] = std::max( pin.rise_block_delay, pin.fall_block_delay );
+        pin_to_pin_delays[i++] = std::max( pin.rise_block_delay, pin.fall_block_delay );
       }
 
-      composed_gate<NInputs> s = {_supergates.size(),
-                                  false,
-                                  g.id,
-                                  g.function,
-                                  g.area,
-                                  pin_to_pin_delays,
-                                  {}};
+      _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
+                                                       false,
+                                                       &g,
+                                                       g.num_vars,
+                                                       g.function,
+                                                       g.area,
+                                                       pin_to_pin_delays,
+                                                       {}} );
+    }
+
+    simple_gates_size = _supergates.size() - initial_size;
 
-      _supergates.emplace_back( s );
+    if ( _ps.verbose )
+    {
+      std::cout << fmt::format( "[i] Loaded {} simple gates in the library\n", simple_gates_size );
     }
   }
 
@@ -121,7 +164,7 @@ class supergate_utils
         "ERROR: NInputs ({}) should be greater or equal than the max number of variables ({}) in the super file.\n", NInputs, _supergates_spec.max_num_vars
         );
       std::cerr << "WARNING: ignoring supergates, proceeding with standard library." << std::endl;
-      compute_library_with_genlib();
+      generate_library_with_genlib();
       return;
     }
 
@@ -146,19 +189,22 @@ class supergate_utils
       kitty::dynamic_truth_table tt{ NInputs };
       kitty::create_nth_var( tt, i );
 
-      composed_gate<NInputs> s = {i,
-                                  false,
-                                  -1,
-                                  tt,
-                                  0.0f,
-                                  {},
-                                  {}};
-
-      _supergates.emplace_back( s );
+      _supergates.emplace_back( composed_gate<NInputs>{i,
+                                                       false,
+                                                       nullptr,
+                                                       0,
+                                                       tt,
+                                                       0.0f,
+                                                       {},
+                                                       {}} );
     }
 
+    generate_library_with_genlib();
+
+    uint32_t super_count = 0;
+
     /* add supergates */
-    for ( auto const g : _supergates_spec.supergates )
+    for ( auto const& g : _supergates_spec.supergates )
     {
       uint32_t root_match_id;
       if ( auto it = gates_map.find( g.name ); it != gates_map.end() )
@@ -184,7 +230,7 @@ class supergate_utils
         continue;
       }
 
-      std::vector<uint32_t> sub_gates;
+      std::vector<composed_gate<NInputs>*> sub_gates;
 
       bool error = false;
       for ( uint32_t f : g.fanins_id )
@@ -194,7 +240,14 @@ class supergate_utils
           error = true;
           std::cerr << fmt::format( "WARNING: ignoring supergate {}, wrong fanins.", g.id ) << std::endl;
         }
-        sub_gates.emplace_back( _supergates[f].id );
+        if ( f < _supergates_spec.max_num_vars )
+        {
+          sub_gates.emplace_back( &_supergates[f] );
+        }
+        else
+        {
+          sub_gates.emplace_back( &_supergates[f + simple_gates_size] );
+        }
       }
 
       if ( error )
@@ -203,71 +256,101 @@ class supergate_utils
       }
 
       float area = compute_area( root_match_id, sub_gates );
-      const auto tt = compute_truth_table( root_match_id, sub_gates );
-
-      composed_gate<NInputs> s = {_supergates.size(),
-                                  g.is_super,
-                                  root_match_id,
-                                  tt,
-                                  area,
-                                  {},
-                                  sub_gates};
+      const kitty::dynamic_truth_table tt = compute_truth_table( root_match_id, sub_gates );
+
+      /* try truth table minimization */
+      auto tt_test = tt;
+      std::vector<uint8_t> const& support = kitty::min_base_inplace( tt_test );
+
+      // if ( g.is_super && tt_test != tt )
+      // {
+      //   /* truth table has don't cares: it shouldn't. Gate is sub-optimal */
+      //   std::cerr << fmt::format( "WARNING: ignoring supergate {}, has internal don't cares.", g.id ) << std::endl;
+      //   std::cout << "Not minimized line: " << _supergates.size() - simple_gates_size - 5 + 23 << std::endl;
+      //   is_super_verified = false;
+      // }
+
+      _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
+                                                       g.is_super,
+                                                       &_gates[root_match_id],
+                                                       0,
+                                                       tt,
+                                                       area,
+                                                       {},
+                                                       sub_gates} );
+
+      if ( g.is_super )
+      {
+        ++super_count;
+      }
 
+      auto& s = _supergates[_supergates.size() - 1];
+      s.num_vars = compute_support( s );
       compute_delay_parameters( s );
-
-      _supergates.emplace_back( s );
     }
 
-    /* add constants and single input gates which are not represented in SUPER */
-    for ( auto& gate : _gates )
+    /* minimize supergates */
+    for ( auto& g : _supergates )
     {
-      if ( gate.function.num_vars() == 0 )
-      {
-        /* constants */
-        composed_gate<NInputs> s = {_supergates.size(),
-                                    false,
-                                    gate.id,
-                                    gate.function,
-                                    gate.area,
-                                    {},
-                                    {}};
-        _supergates.emplace_back( s );
-      }
-      else if ( gate.function.num_vars() == 1 )
+      if ( g.is_super )
       {
-        /* inverter or buffer */
-        composed_gate<NInputs> s = {_supergates.size(),
-                                    false,
-                                    gate.id,
-                                    gate.function,
-                                    gate.area,
-                                    {},
-                                    {}};
-        s.tdelay[0] = std::max( gate.pins[0].rise_block_delay, gate.pins[0].fall_block_delay );
-        _supergates.emplace_back( s );
+        g.function = shrink_to( g.function, static_cast<unsigned>( g.num_vars ) );
       }
     }
+
+    if ( _ps.verbose )
+    {
+      std::cout << fmt::format( "[i] Loaded {} supergates in the library\n", super_count );
+    }
   }
 
 private:
-  inline float compute_area( uint32_t root_id, std::vector<uint32_t> const& sub_gates )
+  inline float compute_area( uint32_t root_id, std::vector<composed_gate<NInputs>*> const& sub_gates )
   {
     float area = _gates[root_id].area;
-    for ( auto const& id : sub_gates )
+    for ( auto const f : sub_gates )
     {
-      area += _supergates[id].area;
+      area += f->area;
     }
 
     return area;
   }
 
-  inline kitty::dynamic_truth_table compute_truth_table( uint32_t root_id, std::vector<uint32_t> const& sub_gates )
+  inline uint32_t compute_support( composed_gate<NInputs>& s )
+  {
+    std::array<uint8_t, NInputs> used_pins{};
+    uint32_t support = 0;
+
+    return compute_support_rec( s, used_pins );
+  }
+
+  uint32_t compute_support_rec( composed_gate<NInputs>& s, std::array<uint8_t, NInputs>& used_pins )
+  {
+    /* termination: input variable */
+    if ( s.root == nullptr )
+    {
+      if ( used_pins[s.id]++ == 0u )
+      {
+        return 1;
+      }
+      return 0;
+    }
+
+    uint32_t support = 0;
+    for ( auto const pin : s.fanin )
+    {
+      support += compute_support_rec( *pin, used_pins );
+    }
+    return support;
+  }
+
+  inline kitty::dynamic_truth_table compute_truth_table( uint32_t root_id, std::vector<composed_gate<NInputs>*> const& sub_gates )
   {
     std::vector<kitty::dynamic_truth_table> ttv;
 
-    for ( auto const& id : sub_gates )
+    for ( auto const f : sub_gates )
     {
-      ttv.emplace_back( _supergates[id].function );
+      ttv.emplace_back( f->function );
     }
 
     return kitty::compose_truth_table( _gates[root_id].function, ttv );
@@ -275,39 +358,42 @@ class supergate_utils
 
   inline void compute_delay_parameters( composed_gate<NInputs>& s )
   {
-    const auto& root = _gates[s.root_id];
+    const auto& root = *( s.root );
 
     auto i = 0u;
     for ( auto const& pin : root.pins )
     {
       float worst_delay = std::max( pin.rise_block_delay, pin.fall_block_delay );
 
-      compute_delay_pin_rec( s, _supergates[s.fanin[i++]], worst_delay );
+      compute_delay_pin_rec( s, *( s.fanin[i++] ), worst_delay );
     }
   }
 
   void compute_delay_pin_rec( composed_gate<NInputs>& root, composed_gate<NInputs>& s, float delay )
   {
     /* termination: input variable */
-    if ( s.root_id == -1 )
+    if ( s.root == nullptr )
     {
       root.tdelay[s.id] = std::max( root.tdelay[s.id], delay );
       return;
     }
 
     auto i = 0u;
-    for ( auto const& pin : _gates[s.root_id].pins )
+    for ( auto const& pin : s.root->pins )
     {
       float worst_delay = delay + std::max( pin.rise_block_delay, pin.fall_block_delay );
 
-      compute_delay_pin_rec( root, _supergates[s.fanin[i++]], worst_delay );
+      compute_delay_pin_rec( root, *( s.fanin[i++] ), worst_delay );
     }
   }
 
 protected:
+  uint32_t simple_gates_size{ 0 };
+
   std::vector<gate> const& _gates;
   super_lib const& _supergates_spec;
-  std::vector<composed_gate<NInputs>> _supergates;
-}; /* Class supergate_utils */
+  super_utils_params const _ps;
+  std::deque<composed_gate<NInputs>> _supergates;
+}; /* class super_utils */
 
 } /* namespace mockturtle */
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 95c86baab..914f2f70a 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -85,9 +85,6 @@ enum class classification_type : uint32_t
 
 struct tech_library_params
 {
-  /*! \brief use configurations from SUPER library */
-  bool use_supergates_configurations = false;
-
   /*! \brief reports np enumerations */
   bool verbose{ false };
 
@@ -98,12 +95,12 @@ struct tech_library_params
 template<unsigned NInputs>
 struct supergate
 {
-  struct gate const* root{};
+  /* pointer to the root gate */
+  composed_gate<NInputs> const* root{};
 
   /* area */
   float area{ 0 };
-  /* worst delay */
-  float worstDelay{ 0 };
+
   /* pin-to-pin delay */
   std::array<float, NInputs> tdelay{};
 
@@ -149,8 +146,8 @@ class tech_library
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
         _ps( ps ),
+        _super( _gates, _supergates_spec ),
         _use_supergates( false ),
-        _supergates(),
         _super_lib()
   {
     generate_library();
@@ -160,8 +157,8 @@ class tech_library
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
         _ps( ps ),
+        _super( _gates, _supergates_spec, super_utils_params{ps.verbose} ),
         _use_supergates( true ),
-        _supergates(),
         _super_lib()
   {
     generate_library();
@@ -216,24 +213,9 @@ class tech_library
     bool inv = false;
     bool buf = false;
 
-    supergate_utils<NInputs> super( _gates, _supergates_spec );
-    _supergates = super.get_super_library();
-
+    /* extract the smallest inverter and buffer info */
     for ( auto& gate : _gates )
     {
-      if ( gate.function.num_vars() > NInputs )
-      {
-        std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, too many variables for the library settings" << std::endl;
-        continue;
-      }
-      if ( gate.pins.size() != 1 && gate.pins.size() != gate.num_vars )
-      {
-        std::cerr << "[i] WARNING: gate " << gate.name << " IGNORED, pins mismatch" << std::endl;
-        continue;
-      }
-      
-      float worst_delay = compute_worst_delay( gate );
-
       if ( gate.function.num_vars() == 1 )
       {
         /* extract inverter delay and area */
@@ -243,7 +225,7 @@ class tech_library
           if ( !inv || gate.area < _inv_area )
           {
             _inv_area = gate.area;
-            _inv_delay = worst_delay;
+            _inv_delay = compute_worst_delay( gate );
             _inv_id = gate.id;
             inv = true;
           }
@@ -254,104 +236,194 @@ class tech_library
           if ( !buf || gate.area < _buf_area )
           {
             _buf_area = gate.area;
-            _buf_delay = worst_delay;
+            _buf_delay = compute_worst_delay( gate );
             _buf_id = gate.id;
             buf = true;
           }
         }
       }
-    // }
+    }
 
-    // for ( auto const& gate : _supergates )
-    // {
-      _max_size = std::max( _max_size, gate.num_vars );
+    auto const& supergates = _super.get_super_library();
+    uint32_t const standard_gate_size = _super.get_standard_library_size();
 
+    /* generate the configurations for the standard gates */
+    uint32_t i = 0u;
+    for ( auto const& gate : supergates )
+    {
       uint32_t np_count = 0;
 
-      const auto on_np = [&]( auto const& tt, auto neg, auto const& perm ) {
-        supergate<NInputs> sg;
-        sg.root = &gate;
-        sg.area = gate.area;
-        sg.worstDelay = worst_delay;
-        sg.polarity = 0;
-        sg.permutation = perm;
+      if ( gate.root == nullptr )
+      {
+        /* exclude PIs */
+        continue;
+      }
 
-        for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
-        {
-          sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
-          sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
-        }
-        for ( auto i = perm.size(); i < NInputs; ++i )
-        {
-          sg.tdelay[i] = 0; /* added for completeness but not necessary */
-        }
+      if ( i++ < standard_gate_size )
+      {
+        _max_size = std::max( _max_size, gate.root->num_vars );
 
-        const auto static_tt = kitty::extend_to<NInputs>( tt );
-
-        auto& v = _super_lib[static_tt];
-
-        /* ordered insert by ascending area and number of input pins */
-        auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
-          if ( s1.area < s2.area )
-            return true;
-          if ( s1.area > s2.area )
-            return false;
-          if ( s1.root->num_vars < s2.root->num_vars )
-            return true;
-          if ( s1.root->num_vars > s2.root->num_vars )
-            return true;
-          return s1.root->id < s2.root->id;
-        } );
-
-        bool to_add = true;
-        /* search for duplicated element due to symmetries */
-        while ( it != v.end() )
-        {
-          if ( sg.root->id == it->root->id )
+        const auto on_np = [&]( auto const& tt, auto neg, auto const& perm ) {
+          supergate<NInputs> sg = {&gate,
+                                  gate.area,
+                                  {},
+                                  perm,
+                                  0};
+
+          for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
-            /* if already in the library exit, else ignore permutations if with equal delay cost */
-            if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+            sg.tdelay[i] = gate.tdelay[perm[i]];
+            sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
+          }
+
+          const auto static_tt = kitty::extend_to<NInputs>( tt );
+
+          auto& v = _super_lib[static_tt];
+
+          /* ordered insert by ascending area and number of input pins */
+          auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+            if ( s1.area < s2.area )
+              return true;
+            if ( s1.area > s2.area )
+              return false;
+            if ( s1.root->num_vars < s2.root->num_vars )
+              return true;
+            if ( s1.root->num_vars > s2.root->num_vars )
+              return true;
+            return s1.root->id < s2.root->id;
+          } );
+
+          bool to_add = true;
+          /* search for duplicated element due to symmetries */
+          while ( it != v.end() )
+          {
+            if ( sg.root->id == it->root->id )
+            {
+              /* if already in the library exit, else ignore permutations if with equal delay cost */
+              if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+              {
+                to_add = false;
+                break;
+              }
+            }
+            else
             {
-              to_add = false;
               break;
             }
+            ++it;
           }
-          else
+
+          if ( to_add )
           {
-            break;
+            v.insert( it, sg );
+            ++np_count;
           }
-          ++it;
+        };
+
+        const auto on_p = [&]( auto const& tt, auto const& perm ) {
+          /* get all the configurations that lead to the N-class representative */
+          auto [tt_canon, phases] = kitty::exact_n_canonization_complete( tt );
+
+          for( auto phase : phases )
+          {
+            supergate<NInputs> sg = {&gate,
+                                    gate.area,
+                                    {},
+                                    perm,
+                                    0};
+
+            for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
+            {
+              sg.tdelay[i] = gate.tdelay[perm[i]];
+              sg.polarity |= phase;
+            }
+
+            const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+
+            auto& v = _super_lib[static_tt];
+
+            /* ordered insert by ascending area and number of input pins */
+            auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+              if ( s1.area < s2.area )
+                return true;
+              if ( s1.area > s2.area )
+                return false;
+              if ( s1.root->num_vars < s2.root->num_vars )
+                return true;
+              if ( s1.root->num_vars > s2.root->num_vars )
+                return true;
+              return s1.root->id < s2.root->id;
+            } );
+
+            bool to_add = true;
+            /* search for duplicated element due to symmetries */
+            while ( it != v.end() )
+            {
+              if ( sg.root->id == it->root->id )
+              {
+                /* if already in the library exit, else ignore permutations if with equal delay cost */
+                if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+                {
+                  to_add = false;
+                  break;
+                }
+              }
+              else
+              {
+                break;
+              }
+              ++it;
+            }
+
+            if ( to_add )
+            {
+              v.insert( it, sg );
+              ++np_count;
+            }
+          }
+        };
+
+        if constexpr ( Configuration == classification_type::np_configurations )
+        {
+          /* NP enumeration of the function */
+          const auto tt = gate.function;
+          kitty::exact_np_enumeration( tt, on_np );
         }
+        else
+        {
+          /* P enumeration followed by N canonization of the function */
+          const auto tt = gate.function;
+          kitty::exact_p_enumeration( tt, on_p );
+        }
+      }
+      else
+      {
+        /* process the supergates */
 
-        if ( to_add )
+        if ( !gate.is_super )
         {
-          v.insert( it, sg );
-          ++np_count;
+          /* ignore simple gates */
+          continue;
         }
-      };
 
-      const auto on_p = [&]( auto const& tt, auto const& perm ) {
-        /* get all the configurations that lead to the N-class representative */
-        auto [tt_canon, phases] = kitty::exact_n_canonization_complete( tt );
+        /* canonize supergates */
+        auto [tt_canon, phases] = kitty::exact_n_canonization_complete( gate.function );
+        std::vector<uint8_t> perm( gate.num_vars );
+        std::iota( perm.begin(), perm.end(), 0u );
 
         for( auto phase : phases )
         {
-          supergate<NInputs> sg;
-          sg.root = &gate;
-          sg.area = gate.area;
-          sg.worstDelay = worst_delay;
-          sg.polarity = 0;
-          sg.permutation = perm;
+          supergate<NInputs> sg = {&gate,
+                                  gate.area,
+                                  {},
+                                  perm,
+                                  0};
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
-            sg.tdelay[i] = static_cast<float>( std::max( gate.pins[perm[i]].rise_block_delay, gate.pins[perm[i]].fall_block_delay ) );
+            sg.tdelay[i] = gate.tdelay[perm[i]];
             sg.polarity |= phase;
           }
-          for ( auto i = perm.size(); i < NInputs; ++i )
-          {
-            sg.tdelay[i] = 0; /* added for completeness but not necessary */
-          }
 
           const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
 
@@ -396,39 +468,11 @@ class tech_library
             ++np_count;
           }
         }
-      };
-
-      if constexpr ( Configuration == classification_type::np_configurations )
-      {
-        /* NP enumeration of the function */
-        const auto tt = gate.function;
-        kitty::exact_np_enumeration( tt, on_np );
-      }
-      else
-      {
-        /* P enumeration followed by N canonization of the function */
-        const auto tt = gate.function;
-        kitty::exact_p_enumeration( tt, on_p );
-      }
-
-      /* supergates */
-      if ( _use_supergates )
-      {
-        for ( auto const& g : _supergates )
-        {
-          /* ignore simple gates */
-          if ( !g.is_super )
-          {
-            continue;
-          }
-
-          /* build supergate */
-        }
       }
 
       if ( _ps.verbose )
       {
-        std::cout << "Gate " << gate.name << ", num_vars = " << gate.num_vars << ", np entries = " << np_count << std::endl;
+        std::cout << "Gate " << gate.root->name << ", num_vars = " << gate.num_vars << ", np entries = " << np_count << std::endl;
       }
     }
 
@@ -450,7 +494,7 @@ class tech_library
         std::cout << ": ";
         for ( auto const& gate : entry.second )
         {
-          printf( "%s(d:%.2f, a:%.2f, p:%d) ", gate.root->name.c_str(), gate.worstDelay, gate.area, gate.polarity );
+          printf( "%d(a:%.2f, p:%d) ", gate.root->id, gate.area, gate.polarity );
         }
         std::cout << std::endl;
       }
@@ -488,9 +532,9 @@ class tech_library
   std::vector<gate> const _gates; /* collection of gates */
   super_lib const& _supergates_spec; /* collection of supergates declarations */
   tech_library_params const _ps;
-  std::vector<composed_gate<NInputs>> _supergates; /* collection of supergates */
+  super_utils<NInputs> _super; /* supergates generation */
   lib_t _super_lib; /* library of enumerated gates */
-};
+}; /* class tech_library */
 
 template<typename Ntk, unsigned NInputs>
 struct exact_supergate
@@ -740,6 +784,6 @@ class exact_library
   RewritingFn const& _rewriting_fn;
   exact_library_params const _ps;
   lib_t _super_lib;
-};
+}; /* class exact_library */
 
 } // namespace mockturtle

From f4f89eb2bd3cedd30647dc05c1d076d2e7b1ae7c Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 11:59:13 +0200
Subject: [PATCH 23/40] Moved gates usage report to binding_view

---
 include/mockturtle/algorithms/mapper.hpp  | 90 -----------------------
 include/mockturtle/utils/super_utils.hpp  |  8 --
 include/mockturtle/views/binding_view.hpp | 38 ++++++++++
 3 files changed, 38 insertions(+), 98 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 5623f65f8..07d6b0a07 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -126,9 +126,6 @@ struct map_stats
   /*! \brief Delay and area stats for each round. */
   std::vector<std::string> round_stats{};
 
-  /*! \brief Gates usage stats. */
-  std::string gates_usage{};
-
   /*! \brief Mapping error. */
   bool mapping_error{ false };
 
@@ -145,9 +142,6 @@ struct map_stats
       std::cout << "\n";
     std::cout << fmt::format( "[i] Mapping runtime = {:>5.2f} secs\n", to_seconds( time_mapping ) );
     std::cout << fmt::format( "[i] Total runtime   = {:>5.2f} secs\n", to_seconds( time_total ) );
-    if ( !gates_usage.empty() )
-      std::cout << "[i] Gates usage report:\n"
-                << gates_usage;
   }
 };
 
@@ -1417,7 +1411,6 @@ class tech_map_impl
     st.delay = delay;
     if ( ps.eswp_rounds )
       st.power = compute_switching_power();
-    // compute_gates_usage();
   }
 
   void create_lut_for_gate( binding_view<klut_network>& res, klut_map& old2new, uint32_t index, unsigned phase )
@@ -1529,89 +1522,6 @@ class tech_map_impl
     return false;
   }
 
-  void compute_gates_usage()
-  {
-    auto const& gates = library.get_gates();
-    std::vector<uint32_t> gates_profile( gates.size(), 0u );
-
-    bool ignore_inv = lib_inv_id == UINT32_MAX;
-
-    ntk.foreach_node( [&]( auto const& n, auto ) {
-      const auto index = ntk.node_to_index( n );
-      auto& node_data = node_match[index];
-
-      if ( ntk.is_constant( n ) )
-      {
-        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
-          return true;
-      }
-      else if ( ntk.is_pi( n ) )
-      {
-        if ( !ignore_inv && node_data.map_refs[1] > 0 )
-          ++gates_profile[lib_inv_id];
-        return true;
-      }
-
-      /* continue if cut is not in the cover */
-      if ( node_match[index].map_refs[2] == 0u )
-        return true;
-
-      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
-
-      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
-      {
-        ++gates_profile[node_data.best_supergate[phase]->root->id];
-
-        if ( !ignore_inv && node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
-          ++gates_profile[lib_inv_id];
-      }
-
-      phase = phase ^ 1;
-      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
-      {
-        ++gates_profile[node_data.best_supergate[phase]->root->id];
-      }
-
-      return true;
-    } );
-
-    if ( lib_buf_id != UINT32_MAX )
-    {
-      ntk.foreach_po( [&]( auto const& f ) {
-        auto const& n = ntk.get_node( f );
-        if ( !ntk.is_constant( n ) && ntk.is_pi( n ) && !ntk.is_complemented( f ) )
-        {
-          ++gates_profile[lib_buf_id];
-        }
-      } );
-    }
-
-    std::stringstream gates_usage;
-    double tot_area = 0.0f;
-    uint32_t tot_instances = 0u;
-    for ( auto i = 0u; i < gates_profile.size(); ++i )
-    {
-      if ( gates_profile[i] > 0u )
-      {
-        auto tot_gate_area = gates_profile[i] * gates[i].area;
-
-        gates_usage << fmt::format( "[i] {:<15}", gates[i].name )
-                    << fmt::format( "\t Instance = {:>10d}", gates_profile[i] )
-                    << fmt::format( "\t Area = {:>12.2f}", tot_gate_area )
-                    << fmt::format( " {:>8.2f} %\n", tot_gate_area / area * 100 );
-
-        tot_instances += gates_profile[i];
-        tot_area += tot_gate_area;
-      }
-    }
-
-    gates_usage << fmt::format( "[i] {:<15}", "TOTAL" )
-                << fmt::format( "\t Instance = {:>10d}", tot_instances )
-                << fmt::format( "\t Area = {:>12.2f}   100.00 %\n", tot_area );
-
-    st.gates_usage = gates_usage.str();
-  }
-
   double compute_switching_power()
   {
     double power = 0.0f;
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 633243606..2da173074 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -262,14 +262,6 @@ class super_utils
       auto tt_test = tt;
       std::vector<uint8_t> const& support = kitty::min_base_inplace( tt_test );
 
-      // if ( g.is_super && tt_test != tt )
-      // {
-      //   /* truth table has don't cares: it shouldn't. Gate is sub-optimal */
-      //   std::cerr << fmt::format( "WARNING: ignoring supergate {}, has internal don't cares.", g.id ) << std::endl;
-      //   std::cout << "Not minimized line: " << _supergates.size() - simple_gates_size - 5 + 23 << std::endl;
-      //   is_super_verified = false;
-      // }
-
       _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
                                                        g.is_super,
                                                        &_gates[root_match_id],
diff --git a/include/mockturtle/views/binding_view.hpp b/include/mockturtle/views/binding_view.hpp
index a7314bc8a..4e0296753 100644
--- a/include/mockturtle/views/binding_view.hpp
+++ b/include/mockturtle/views/binding_view.hpp
@@ -36,6 +36,7 @@
 #include "../utils/node_map.hpp"
 
 #include <map>
+#include <iostream>
 
 namespace mockturtle
 {
@@ -116,6 +117,43 @@ class binding_view : public Ntk
     return _library;
   }
 
+  void compute_gates_usage( std::ostream& os )
+  {
+    std::vector<uint32_t> gates_profile( _library.size(), 0u );
+
+    double area = 0;
+    Ntk::foreach_node( [&]( auto const& n, auto ) {
+      if ( has_binding( n ) )
+      {
+        auto const& g = get_binding( n );
+        ++gates_profile[g.id];
+        area += g.area;
+      }
+    } );
+
+    os << "[i] Report gates usage\n";
+
+    uint32_t tot_instances = 0u;
+    for ( auto i = 0u; i < gates_profile.size(); ++i )
+    {
+      if ( gates_profile[i] > 0u )
+      {
+        float tot_gate_area = gates_profile[i] * _library[i].area;
+
+        os << fmt::format( "[i] {:<15}", _library[i].name )
+           << fmt::format( "\t Instance = {:>10d}", gates_profile[i] )
+           << fmt::format( "\t Area = {:>12.2f}", tot_gate_area )
+           << fmt::format( " {:>8.2f} %\n", tot_gate_area / area * 100 );
+
+        tot_instances += gates_profile[i];
+      }
+    }
+
+    os << fmt::format( "[i] {:<15}", "TOTAL" )
+       << fmt::format( "\t Instance = {:>10d}", tot_instances )
+       << fmt::format( "\t Area = {:>12.2f}   100.00 %\n", area );
+  }
+
 private:
   std::vector<gate> const _library;
   node_map<uint32_t, Ntk, std::unordered_map<node, uint32_t>> _bindings;

From 183284c8f0dce1da73212d41e6eff51ccf332534 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 12:22:50 +0200
Subject: [PATCH 24/40] Test fixes for new library structure

---
 test/utils/tech_library.cpp | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index 284491872..f0bfc2c46 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -63,14 +63,12 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   auto const inv = lib.get_supergates( tt );
   CHECK( inv != nullptr );
   CHECK( inv->size() == 2 );
-  CHECK( ( *inv )[0].root->name == "inv1" );
+  CHECK( ( *inv )[0].root->root->name == "inv1" );
   CHECK( ( *inv )[0].area == 1.0f );
-  CHECK( ( *inv )[0].worstDelay == 0.9f );
   CHECK( ( *inv )[0].tdelay[0] == 0.9f );
   CHECK( ( *inv )[0].polarity == 0u );
-  CHECK( ( *inv )[1].root->name == "inv2" );
+  CHECK( ( *inv )[1].root->root->name == "inv2" );
   CHECK( ( *inv )[1].area == 2.0f );
-  CHECK( ( *inv )[1].worstDelay == 1.0f );
   CHECK( ( *inv )[1].tdelay[0] == 1.0f );
   CHECK( ( *inv )[1].polarity == 0u );
 
@@ -78,9 +76,8 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   auto const nand_7 = lib.get_supergates( tt );
   CHECK( nand_7 != nullptr );
   CHECK( nand_7->size() == 1 );
-  CHECK( ( *nand_7 )[0].root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
   CHECK( ( *nand_7 )[0].area == 2.0f );
-  CHECK( ( *nand_7 )[0].worstDelay == 1.0f );
   CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
   CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
   CHECK( ( *nand_7 )[0].polarity == 0u );
@@ -89,9 +86,8 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   auto const nand_b = lib.get_supergates( tt );
   CHECK( nand_b != nullptr );
   CHECK( nand_b->size() == 1 );
-  CHECK( ( *nand_b )[0].root->name == "nand2" );
+  CHECK( ( *nand_b )[0].root->root->name == "nand2" );
   CHECK( ( *nand_b )[0].area == 2.0f );
-  CHECK( ( *nand_b )[0].worstDelay == 1.0f );
   CHECK( ( *nand_b )[0].tdelay[0] == 1.0f );
   CHECK( ( *nand_b )[0].tdelay[1] == 1.0f );
   CHECK( ( *nand_b )[0].polarity == 1u );
@@ -100,9 +96,8 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   auto const nand_d = lib.get_supergates( tt );
   CHECK( nand_d != nullptr );
   CHECK( nand_d->size() == 1 );
-  CHECK( ( *nand_d )[0].root->name == "nand2" );
+  CHECK( ( *nand_d )[0].root->root->name == "nand2" );
   CHECK( ( *nand_d )[0].area == 2.0f );
-  CHECK( ( *nand_d )[0].worstDelay == 1.0f );
   CHECK( ( *nand_d )[0].tdelay[0] == 1.0f );
   CHECK( ( *nand_d )[0].tdelay[1] == 1.0f );
   CHECK( ( *nand_d )[0].polarity == 2u );
@@ -111,9 +106,8 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   auto const nand_e = lib.get_supergates( tt );
   CHECK( nand_e != nullptr );
   CHECK( nand_e->size() == 1 );
-  CHECK( ( *nand_e )[0].root->name == "nand2" );
+  CHECK( ( *nand_e )[0].root->root->name == "nand2" );
   CHECK( ( *nand_e )[0].area == 2.0f );
-  CHECK( ( *nand_e )[0].worstDelay == 1.0f );
   CHECK( ( *nand_e )[0].tdelay[0] == 1.0f );
   CHECK( ( *nand_e )[0].tdelay[1] == 1.0f );
   CHECK( ( *nand_e )[0].polarity == 3u );
@@ -139,14 +133,12 @@ TEST_CASE( "Simple library generation 2", "[tech_library]" )
   auto const inv = lib.get_supergates( tt );
   CHECK( inv != nullptr );
   CHECK( inv->size() == 2 );
-  CHECK( ( *inv )[0].root->name == "inv1" );
+  CHECK( ( *inv )[0].root->root->name == "inv1" );
   CHECK( ( *inv )[0].area == 1.0f );
-  CHECK( ( *inv )[0].worstDelay == 0.9f );
   CHECK( ( *inv )[0].tdelay[0] == 0.9f );
   CHECK( ( *inv )[0].polarity == 0u );
-  CHECK( ( *inv )[1].root->name == "inv2" );
+  CHECK( ( *inv )[1].root->root->name == "inv2" );
   CHECK( ( *inv )[1].area == 2.0f );
-  CHECK( ( *inv )[1].worstDelay == 1.0f );
   CHECK( ( *inv )[1].tdelay[0] == 1.0f );
   CHECK( ( *inv )[1].polarity == 0u );
 
@@ -154,9 +146,8 @@ TEST_CASE( "Simple library generation 2", "[tech_library]" )
   auto const nand_7 = lib.get_supergates( tt );
   CHECK( nand_7 != nullptr );
   CHECK( nand_7->size() == 1 );
-  CHECK( ( *nand_7 )[0].root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
   CHECK( ( *nand_7 )[0].area == 2.0f );
-  CHECK( ( *nand_7 )[0].worstDelay == 1.0f );
   CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
   CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
   CHECK( ( *nand_7 )[0].polarity == 0u );

From 08cfffaa0c5e1155d54eaf7824eecfddca1f01cb Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 14:29:10 +0200
Subject: [PATCH 25/40] Added report commands in binding_view

---
 include/mockturtle/views/binding_view.hpp | 52 ++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/include/mockturtle/views/binding_view.hpp b/include/mockturtle/views/binding_view.hpp
index 4e0296753..93a60db5f 100644
--- a/include/mockturtle/views/binding_view.hpp
+++ b/include/mockturtle/views/binding_view.hpp
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "../io/genlib_reader.hpp"
+#include "../views/topo_view.hpp"
 #include "../utils/node_map.hpp"
 
 #include <map>
@@ -117,7 +118,54 @@ class binding_view : public Ntk
     return _library;
   }
 
-  void compute_gates_usage( std::ostream& os )
+  double compute_area() const
+  {
+    double area = 0;
+    Ntk::foreach_node( [&]( auto const& n, auto ) {
+      if ( has_binding( n ) )
+      {
+        area += get_binding( n ).area;
+      }
+    } );
+
+    return area;
+  }
+
+  double compute_worst_delay() const
+  {
+    topo_view ntk_topo{*this};
+    node_map<double, Ntk> delays( *this );
+    double worst_delay = 0;
+
+    ntk_topo.foreach_node( [&]( auto const& n, auto ) {
+      if ( Ntk::is_constant( n ) || Ntk::is_pi( n ) )
+      {
+        delays[n] = 0;
+        return true;
+      }
+
+      if ( has_binding( n ) )
+      {
+        auto const& g = get_binding( n );
+        double gate_delay = 0;
+        Ntk::foreach_fanin( n, [&]( auto const& f, auto i ) {
+          gate_delay = std::max( gate_delay, (double) ( delays[f] + std::max( g.pins[i].rise_block_delay, g.pins[i].fall_block_delay ) ) );
+        } );
+        delays[n] = gate_delay;
+        worst_delay = std::max( worst_delay, gate_delay );
+      }
+      return true;
+    } );
+
+    return worst_delay;
+  }
+
+  void report_stats( std::ostream& os = std::cout ) const
+  {
+    os << fmt::format( "[i] Report stats: area = {:>5.2f}; delay = {:>5.2f};\n", compute_area(), compute_worst_delay() );
+  }
+
+  void report_gates_usage( std::ostream& os = std::cout ) const
   {
     std::vector<uint32_t> gates_profile( _library.size(), 0u );
 
@@ -131,7 +179,7 @@ class binding_view : public Ntk
       }
     } );
 
-    os << "[i] Report gates usage\n";
+    os << "[i] Report gates usage:\n";
 
     uint32_t tot_instances = 0u;
     for ( auto i = 0u; i < gates_profile.size(); ++i )

From fc41d175e440bc28eff55b03e418ba3200b251e8 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 19:31:48 +0200
Subject: [PATCH 26/40] Added new tests for supergates, modified supergates
 insertion procedure to avoid duplicated gates

---
 include/mockturtle/io/super_reader.hpp    |   6 +-
 include/mockturtle/utils/super_utils.hpp  |  20 +-
 include/mockturtle/utils/tech_library.hpp |   4 +-
 test/io/super_reader.cpp                  |  76 +++++++
 test/utils/super_utils.cpp                | 231 ++++++++++++++++++++++
 test/utils/tech_library.cpp               | 119 ++++++++++-
 6 files changed, 446 insertions(+), 10 deletions(-)
 create mode 100644 test/io/super_reader.cpp
 create mode 100644 test/utils/super_utils.cpp

diff --git a/include/mockturtle/io/super_reader.hpp b/include/mockturtle/io/super_reader.hpp
index a90e9e47f..1c7c7d9ac 100644
--- a/include/mockturtle/io/super_reader.hpp
+++ b/include/mockturtle/io/super_reader.hpp
@@ -47,14 +47,14 @@ struct supergate_spec
   unsigned int id;
   std::string name{};
   bool is_super{ false };
-  std::vector<uint32_t> fanins_id;
+  std::vector<uint32_t> fanin_id;
 };
 
 struct super_lib
 {
   std::string genlib_name{};
   uint32_t max_num_vars{ 0u };
-  uint32_t num_superGates{ 0u };
+  uint32_t num_supergates{ 0u };
   uint32_t num_lines{ 0 };
   std::vector<supergate_spec> supergates{};
 };
@@ -83,7 +83,7 @@ class super_reader : public lorina::super_reader
   {
     lib.genlib_name = genlib_name;
     lib.max_num_vars = max_num_vars;
-    lib.num_superGates = max_superGates; 
+    lib.num_supergates = max_superGates; 
     lib.num_lines = num_lines;
   }
 
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 2da173074..877493723 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -183,6 +183,7 @@ class super_utils
       }
     }
 
+
     /* creating input variables */
     for ( uint8_t i = 0; i < _supergates_spec.max_num_vars; ++i )
     {
@@ -219,7 +220,7 @@ class super_utils
 
       uint32_t num_vars = _gates[root_match_id].num_vars;
 
-      if ( num_vars != g.fanins_id.size() )
+      if ( num_vars != g.fanin_id.size() )
       {
         std::cerr << fmt::format( "WARNING: ignoring supergate {}, wrong number of fanins.", g.id ) << std::endl;
         continue;
@@ -233,7 +234,8 @@ class super_utils
       std::vector<composed_gate<NInputs>*> sub_gates;
 
       bool error = false;
-      for ( uint32_t f : g.fanins_id )
+      bool simple_gate = true;
+      for ( uint32_t f : g.fanin_id )
       {
         if ( f >= g.id + _supergates_spec.max_num_vars )
         {
@@ -247,6 +249,7 @@ class super_utils
         else
         {
           sub_gates.emplace_back( &_supergates[f + simple_gates_size] );
+          simple_gate = false;
         }
       }
 
@@ -255,6 +258,17 @@ class super_utils
         continue;
       }
 
+      /* force at `is_super = false` simple gates considered as supergates.
+       * This is necessary to not have duplicates since tech_library
+       * computes indipendently the permutations for simple gates.
+       * Moreover simple gates permutations could be are incomplete in SUPER
+       * libraries constrained by number of gates. */
+      bool is_super_verified = g.is_super;
+      if ( simple_gate )
+      {
+        is_super_verified = false;
+      }
+
       float area = compute_area( root_match_id, sub_gates );
       const kitty::dynamic_truth_table tt = compute_truth_table( root_match_id, sub_gates );
 
@@ -263,7 +277,7 @@ class super_utils
       std::vector<uint8_t> const& support = kitty::min_base_inplace( tt_test );
 
       _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
-                                                       g.is_super,
+                                                       is_super_verified,
                                                        &_gates[root_match_id],
                                                        0,
                                                        tt,
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 914f2f70a..67e9eeb6a 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -259,10 +259,10 @@ class tech_library
         continue;
       }
 
+      _max_size = std::max( _max_size, gate.num_vars );
+
       if ( i++ < standard_gate_size )
       {
-        _max_size = std::max( _max_size, gate.root->num_vars );
-
         const auto on_np = [&]( auto const& tt, auto neg, auto const& perm ) {
           supergate<NInputs> sg = {&gate,
                                   gate.area,
diff --git a/test/io/super_reader.cpp b/test/io/super_reader.cpp
new file mode 100644
index 000000000..30ea7fa08
--- /dev/null
+++ b/test/io/super_reader.cpp
@@ -0,0 +1,76 @@
+#include <catch.hpp>
+
+#include <mockturtle/io/super_reader.hpp>
+#include <lorina/super.hpp>
+
+#include <sstream>
+#include <string>
+
+TEST_CASE( "read super file", "[super_reader]" )
+{
+  std::string const file{
+    "test.genlib\n"
+    "3\n"
+    "8\n"
+    "14\n"       
+    "* and 1 0\n"
+    "* and 2 3\n"
+    "and 2 0\n"
+    "* and 1 5\n"
+    "or 2 1\n"
+    "* and 0 7\n"
+    "* or 1 0\n"
+    "* and 0 9\n"
+    "or 2 0\n"
+    "* and 1 11\n"
+    "* and 2 9\n"
+    "\0"
+  };
+
+  mockturtle::super_lib super_data;
+
+  std::istringstream in( file );
+  auto const result = lorina::read_super( in, mockturtle::super_reader( super_data ) );
+  CHECK( result == lorina::return_code::success );
+
+  CHECK( super_data.genlib_name == "test.genlib" );
+  CHECK( super_data.max_num_vars == 3 );
+  CHECK( super_data.num_supergates == 8 );
+  CHECK( super_data.num_lines == 14 );
+  CHECK( super_data.supergates.size() == 11 );
+
+  CHECK( super_data.supergates[0].id == 0 );
+  CHECK( super_data.supergates[0].name == "and" );
+  CHECK( super_data.supergates[0].is_super == true );
+  CHECK( super_data.supergates[0].fanin_id.size() == 2 );
+  CHECK( super_data.supergates[0].fanin_id[0] == 1 );
+  CHECK( super_data.supergates[0].fanin_id[1] == 0 );
+
+  CHECK( super_data.supergates[1].id == 1 );
+  CHECK( super_data.supergates[1].name == "and" );
+  CHECK( super_data.supergates[1].is_super == true );
+  CHECK( super_data.supergates[1].fanin_id.size() == 2 );
+  CHECK( super_data.supergates[1].fanin_id[0] == 2 );
+  CHECK( super_data.supergates[1].fanin_id[1] == 3 );
+
+  CHECK( super_data.supergates[2].id == 2 );
+  CHECK( super_data.supergates[2].name == "and" );
+  CHECK( super_data.supergates[2].is_super == false );
+  CHECK( super_data.supergates[2].fanin_id.size() == 2 );
+  CHECK( super_data.supergates[2].fanin_id[0] == 2 );
+  CHECK( super_data.supergates[2].fanin_id[1] == 0 );
+
+  CHECK( super_data.supergates[3].id == 3 );
+  CHECK( super_data.supergates[3].name == "and" );
+  CHECK( super_data.supergates[3].is_super == true );
+  CHECK( super_data.supergates[3].fanin_id.size() == 2 );
+  CHECK( super_data.supergates[3].fanin_id[0] == 1 );
+  CHECK( super_data.supergates[3].fanin_id[1] == 5 );
+
+  CHECK( super_data.supergates[4].id == 4 );
+  CHECK( super_data.supergates[4].name == "or" );
+  CHECK( super_data.supergates[4].is_super == false );
+  CHECK( super_data.supergates[4].fanin_id.size() == 2 );
+  CHECK( super_data.supergates[4].fanin_id[0] == 2 );
+  CHECK( super_data.supergates[4].fanin_id[1] == 1 );
+}
diff --git a/test/utils/super_utils.cpp b/test/utils/super_utils.cpp
new file mode 100644
index 000000000..e2744222b
--- /dev/null
+++ b/test/utils/super_utils.cpp
@@ -0,0 +1,231 @@
+#include <catch.hpp>
+
+#include <cstdint>
+#include <vector>
+
+#include <lorina/genlib.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/utils/super_utils.hpp>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/static_truth_table.hpp>
+#include <kitty/npn.hpp>
+
+using namespace mockturtle;
+
+std::string const genlib_library = "GATE zero 0 O=0;\n"
+                                   "GATE one 0 O=1;\n"
+                                   "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE and 5 O=(ab); PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE or 5 O={ab}; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n";
+
+std::string const super_library = "test.genlib\n"
+                                  "3\n"
+                                  "8\n"
+                                  "14\n"       
+                                  "* and 1 0\n"
+                                  "* and 2 3\n"
+                                  "and 2 0\n"
+                                  "* and 1 5\n"
+                                  "or 2 1\n"
+                                  "* and 0 7\n"
+                                  "* or 1 0\n"
+                                  "* and 0 9\n"
+                                  "or 2 0\n"
+                                  "* and 1 11\n"
+                                  "* and 2 9\n"
+                                  "\0";
+
+TEST_CASE( "Standard gates super library", "[super_utils]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in_genlib( genlib_library );
+  auto result = lorina::read_genlib( in_genlib, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  super_utils<3> super( gates );
+
+  auto const& lib = super.get_super_library();
+  CHECK( lib.size() == 6 );
+
+  CHECK( lib[0].id == 0 );
+  CHECK( lib[0].is_super == false );
+  CHECK( lib[0].root == &gates[0] );
+  CHECK( lib[0].num_vars == 0 );
+  CHECK( lib[0].function == gates[0].function );
+  CHECK( lib[0].tdelay[0] == 0 );
+  CHECK( lib[0].fanin.size() == 0 );
+
+  CHECK( lib[1].id == 1 );
+  CHECK( lib[1].is_super == false );
+  CHECK( lib[1].root == &gates[1] );
+  CHECK( lib[1].num_vars == 0 );
+  CHECK( lib[1].function == gates[1].function );
+  CHECK( lib[1].tdelay[0] == 0 );
+  CHECK( lib[1].fanin.size() == 0 );
+
+  CHECK( lib[2].id == 2 );
+  CHECK( lib[2].is_super == false );
+  CHECK( lib[2].root == &gates[2] );
+  CHECK( lib[2].num_vars == 1 );
+  CHECK( lib[2].function == gates[2].function );
+  CHECK( lib[2].tdelay[0] == 1 );
+  CHECK( lib[3].tdelay[1] == 0 );
+  CHECK( lib[2].fanin.size() == 0 );
+
+  CHECK( lib[3].id == 3 );
+  CHECK( lib[3].is_super == false );
+  CHECK( lib[3].root == &gates[3] );
+  CHECK( lib[3].num_vars == 1 );
+  CHECK( lib[3].function == gates[3].function );
+  CHECK( lib[3].tdelay[0] == 1 );
+  CHECK( lib[3].tdelay[1] == 0 );
+  CHECK( lib[3].fanin.size() == 0 );
+
+  CHECK( lib[4].id == 4 );
+  CHECK( lib[4].is_super == false );
+  CHECK( lib[4].root == &gates[4] );
+  CHECK( lib[4].num_vars == 2 );
+  CHECK( lib[4].function == gates[4].function );
+  CHECK( lib[4].tdelay[0] == 1 );
+  CHECK( lib[4].tdelay[1] == 1 );
+  CHECK( lib[4].fanin.size() == 0 );
+
+  CHECK( lib[5].id == 5 );
+  CHECK( lib[5].is_super == false );
+  CHECK( lib[5].root == &gates[5] );
+  CHECK( lib[5].num_vars == 2 );
+  CHECK( lib[5].function == gates[5].function );
+  CHECK( lib[5].tdelay[0] == 1 );
+  CHECK( lib[5].tdelay[1] == 1 );
+  CHECK( lib[5].fanin.size() == 0 );
+}
+
+TEST_CASE( "Supergates super library", "[super_utils]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_genlib( genlib_library );
+  auto result = lorina::read_genlib( in_genlib, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, mockturtle::super_reader( super_data ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  super_utils<3> super( gates, super_data );
+
+  auto const& lib = super.get_super_library();
+  CHECK( lib.size() == 20 );
+
+  CHECK( lib[0].id == 0 );
+  CHECK( lib[0].is_super == false );
+  CHECK( lib[0].root == nullptr );
+  CHECK( lib[0].num_vars == 0 );
+  CHECK( lib[0].function._bits[0] == 0xAA );
+  CHECK( lib[0].tdelay[0] == 0 );
+  CHECK( lib[0].fanin.size() == 0 );
+
+  CHECK( lib[1].id == 1 );
+  CHECK( lib[1].is_super == false );
+  CHECK( lib[1].root == nullptr );
+  CHECK( lib[1].num_vars == 0 );
+  CHECK( lib[1].function._bits[0] == 0xCC );
+  CHECK( lib[1].tdelay[0] == 0 );
+  CHECK( lib[1].fanin.size() == 0 );
+
+  CHECK( lib[2].id == 2 );
+  CHECK( lib[2].is_super == false );
+  CHECK( lib[2].root == nullptr );
+  CHECK( lib[2].num_vars == 0 );
+  CHECK( lib[2].function._bits[0] == 0xF0 );
+  CHECK( lib[2].tdelay[0] == 0 );
+  CHECK( lib[2].fanin.size() == 0 );
+
+  CHECK( lib[3].id == 3 );
+  CHECK( lib[3].is_super == false );
+  CHECK( lib[3].root == &gates[0] );
+  CHECK( lib[3].num_vars == 0 );
+  CHECK( lib[3].function == gates[0].function );
+  CHECK( lib[3].tdelay[0] == 0 );
+  CHECK( lib[3].fanin.size() == 0 );
+
+  CHECK( lib[4].id == 4 );
+  CHECK( lib[4].is_super == false );
+  CHECK( lib[4].root == &gates[1] );
+  CHECK( lib[4].num_vars == 0 );
+  CHECK( lib[4].function == gates[1].function );
+  CHECK( lib[4].tdelay[0] == 0 );
+  CHECK( lib[4].fanin.size() == 0 );
+
+  CHECK( lib[6].id == 6 );
+  CHECK( lib[6].is_super == false );
+  CHECK( lib[6].root == &gates[3] );
+  CHECK( lib[6].num_vars == 1 );
+  CHECK( lib[6].function == gates[3].function );
+  CHECK( lib[6].tdelay[0] == 1 );
+  CHECK( lib[6].fanin.size() == 0 );
+
+  CHECK( lib[7].id == 7 );
+  CHECK( lib[7].is_super == false );
+  CHECK( lib[7].root == &gates[4] );
+  CHECK( lib[7].num_vars == 2 );
+  CHECK( lib[7].function == gates[4].function );
+  CHECK( lib[7].tdelay[0] == 1 );
+  CHECK( lib[7].tdelay[1] == 1 );
+  CHECK( lib[7].fanin.size() == 0 );
+
+  CHECK( lib[9].id == 9 );
+  CHECK( lib[9].is_super == false );
+  CHECK( lib[9].root == &gates[4] );
+  CHECK( lib[9].num_vars == 2 );
+  CHECK( lib[9].function._bits[0] == 0x88 );
+  CHECK( lib[9].tdelay[0] == 1 );
+  CHECK( lib[9].tdelay[1] == 1 );
+  CHECK( lib[9].fanin.size() == 2 );
+  CHECK( lib[9].fanin[0] == &lib[1] );
+  CHECK( lib[9].fanin[1] == &lib[0] );
+
+  CHECK( lib[10].id == 10 );
+  CHECK( lib[10].is_super == true );
+  CHECK( lib[10].root == &gates[4] );
+  CHECK( lib[10].num_vars == 3 );
+  CHECK( lib[10].function._bits[0] == 0x80 );
+  CHECK( lib[10].tdelay[0] == 2 );
+  CHECK( lib[10].tdelay[1] == 2 );
+  CHECK( lib[10].tdelay[2] == 1 );
+  CHECK( lib[10].fanin.size() == 2 );
+  CHECK( lib[10].fanin[0] == &lib[2] );
+  CHECK( lib[10].fanin[1] == &lib[9] );
+
+  CHECK( lib[13].id == 13 );
+  CHECK( lib[13].is_super == false );
+  CHECK( lib[13].root == &gates[5] );
+  CHECK( lib[13].num_vars == 2 );
+  CHECK( lib[13].function._bits[0] == 0xFC );
+  CHECK( lib[13].tdelay[0] == 0 );
+  CHECK( lib[13].tdelay[1] == 1 );
+  CHECK( lib[13].tdelay[2] == 1 );
+  CHECK( lib[13].fanin.size() == 2 );
+  CHECK( lib[13].fanin[0] == &lib[2] );
+  CHECK( lib[13].fanin[1] == &lib[1] );
+
+  CHECK( lib[14].id == 14 );
+  CHECK( lib[14].is_super == true );
+  CHECK( lib[14].root == &gates[4] );
+  CHECK( lib[14].num_vars == 3 );
+  CHECK( lib[14].function._bits[0] == 0xA8 );
+  CHECK( lib[14].tdelay[0] == 1 );
+  CHECK( lib[14].tdelay[1] == 2 );
+  CHECK( lib[14].tdelay[2] == 2 );
+  CHECK( lib[14].fanin.size() == 2 );
+  CHECK( lib[14].fanin[0] == &lib[0] );
+  CHECK( lib[14].fanin[1] == &lib[13] );
+}
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index f0bfc2c46..e8bd92ed7 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -5,6 +5,8 @@
 
 #include <lorina/genlib.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/io/super_reader.hpp>
+#include <mockturtle/utils/super_utils.hpp>
 #include <mockturtle/utils/tech_library.hpp>
 
 #include <kitty/constructors.hpp>
@@ -19,6 +21,29 @@ std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1
                                         "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
                                         "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
 
+std::string const simple_library = "GATE zero 0 O=0;\n"
+                                   "GATE one 0 O=1;\n"
+                                   "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE and 5 O=(ab); PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE or 5 O={ab}; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n";
+
+std::string const super_library = "simple.genlib\n"
+                                  "3\n"
+                                  "7\n"
+                                  "13\n"       
+                                  "* and 1 0\n"
+                                  "* and 2 3\n"
+                                  "and 2 0\n"
+                                  "* and 1 5\n"
+                                  "or 2 1\n"
+                                  "* and 0 7\n"
+                                  "* or 1 0\n"
+                                  "or 2 0\n"
+                                  "* and 1 10\n"
+                                  "* and 2 9\n"
+                                  "\0";
+
 std::string const test_library =  "GATE   inv1    3 O=!a;           PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
                                   "GATE   inv2    2 O=!a;           PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
                                   "GATE   inv3    1 O=!a;           PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
@@ -43,7 +68,7 @@ std::string const test_library =  "GATE   inv1    3 O=!a;           PIN * INV 3
                                   "GATE   zero    0 O=0;\n"
                                   "GATE   one     0 O=1;";
 
-TEST_CASE( "Simple library generation 1", "[tech_library]" )
+TEST_CASE( "Simple test library generation 1", "[tech_library]" )
 {
   std::vector<gate> gates;
 
@@ -56,6 +81,7 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
 
   CHECK( lib.max_gate_size() == 2 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
 
   kitty::static_truth_table<2> tt;
 
@@ -113,7 +139,7 @@ TEST_CASE( "Simple library generation 1", "[tech_library]" )
   CHECK( ( *nand_e )[0].polarity == 3u );
 }
 
-TEST_CASE( "Simple library generation 2", "[tech_library]" )
+TEST_CASE( "Simple test library generation 2", "[tech_library]" )
 {
   std::vector<gate> gates;
 
@@ -126,6 +152,7 @@ TEST_CASE( "Simple library generation 2", "[tech_library]" )
 
   CHECK( lib.max_gate_size() == 2 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
 
   kitty::static_truth_table<2> tt;
 
@@ -165,6 +192,94 @@ TEST_CASE( "Simple library generation 2", "[tech_library]" )
   CHECK( nand_e == nullptr );
 }
 
+TEST_CASE( "Supergate library generation", "[tech_library]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_genlib( simple_library );
+  auto result = lorina::read_genlib( in_genlib, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, mockturtle::super_reader( super_data ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params ps;
+  ps.verbose = true;
+  ps.very_verbose = true;
+  tech_library<3, classification_type::p_configurations> lib( gates, super_data );
+  fflush( stdout );
+
+  CHECK( lib.max_gate_size() == 3 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 1.0f, 2u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 3u ) );
+
+  kitty::static_truth_table<3> tt;
+
+  kitty::create_from_hex_string( tt, "55" );
+  auto const inv = lib.get_supergates( tt );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 1 );
+  CHECK( ( *inv )[0].root->root->name == "inverter" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].tdelay[0] == 1.0f );
+  CHECK( ( *inv )[0].polarity == 0u );
+  
+
+  kitty::create_from_hex_string( tt, "11" );
+  auto const and_1 = lib.get_supergates( tt );
+  CHECK( and_1 != nullptr );
+  CHECK( and_1->size() == 1 );
+  CHECK( ( *and_1 )[0].root->root->name == "and" );
+  CHECK( ( *and_1 )[0].area == 5.0f );
+  CHECK( ( *and_1 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *and_1 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *and_1 )[0].polarity == 3u );
+
+  kitty::create_from_hex_string( tt, "22" );
+  auto const and_8 = lib.get_supergates( tt );
+  CHECK( and_8 == nullptr );
+
+  kitty::create_from_hex_string( tt, "44" );
+  auto const nand_d = lib.get_supergates( tt );
+  CHECK( nand_d == nullptr );
+
+  kitty::create_from_hex_string( tt, "88" );
+  auto const nand_e = lib.get_supergates( tt );
+  CHECK( nand_e == nullptr );
+
+  kitty::create_from_hex_string( tt, "07" );
+  auto const andor_07 = lib.get_supergates( tt );
+  CHECK( andor_07 != nullptr );
+  CHECK( andor_07->size() == 1 );
+  CHECK( ( *andor_07 )[0].root->root->name == "and" );
+  CHECK( ( *andor_07 )[0].area == 10.0f );
+  CHECK( ( *andor_07 )[0].tdelay[0] == 2.0f );
+  CHECK( ( *andor_07 )[0].tdelay[1] == 2.0f );
+  CHECK( ( *andor_07 )[0].tdelay[2] == 1.0f );
+  CHECK( ( *andor_07 )[0].polarity == 7u );
+
+  kitty::create_from_hex_string( tt, "01" );
+  auto const and_01 = lib.get_supergates( tt );
+  CHECK( and_01 != nullptr );
+  CHECK( and_01->size() == 2 );
+  CHECK( ( *and_01 )[0].root->root->name == "and" );
+  CHECK( ( *and_01 )[0].area == 10.0f );
+  CHECK( ( *and_01 )[0].tdelay[0] == 2.0f );
+  CHECK( ( *and_01 )[0].tdelay[1] == 2.0f );
+  CHECK( ( *and_01 )[0].tdelay[2] == 1.0f );
+  CHECK( ( *and_01 )[0].polarity == 7u );
+  CHECK( ( *and_01 )[1].root->root->name == "and" );
+  CHECK( ( *and_01 )[1].area == 10.0f );
+  CHECK( ( *and_01 )[1].tdelay[0] == 2.0f );
+  CHECK( ( *and_01 )[1].tdelay[1] == 1.0f );
+  CHECK( ( *and_01 )[1].tdelay[2] == 2.0f );
+  CHECK( ( *and_01 )[1].polarity == 7u );
+}
+
 TEST_CASE( "Complete library generation", "[tech_library]" )
 {
   std::vector<gate> gates;

From 2870bac4a958f04702ec3d6409292cc5a1cdf3c7 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 9 Aug 2021 23:36:20 +0200
Subject: [PATCH 27/40] Added NP-configurations of supergates, use of phmap for
 better performances, added tests for mapping with supergates

---
 include/mockturtle/utils/tech_library.hpp | 98 ++++++++++++++++++++---
 lib/kitty/kitty/npn.hpp                   | 59 +++++++++++++-
 test/algorithms/mapper.cpp                | 51 ++++++++++++
 test/utils/super_utils.cpp                |  1 +
 test/utils/tech_library.cpp               |  1 +
 5 files changed, 195 insertions(+), 15 deletions(-)

diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 67e9eeb6a..245a21e16 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -42,6 +42,8 @@
 #include <kitty/print.hpp>
 #include <kitty/static_truth_table.hpp>
 
+#include <parallel_hashmap/phmap.h>
+
 #include "super_utils.hpp"
 #include "../io/genlib_reader.hpp"
 #include "../io/super_reader.hpp"
@@ -139,7 +141,7 @@ class tech_library
 {
   using supergates_list_t = std::vector<supergate<NInputs>>;
   using tt_hash = kitty::hash<kitty::static_truth_table<NInputs>>;
-  using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
+  using lib_t = phmap::flat_hash_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
 
 public:
   explicit tech_library( std::vector<gate> const& gates, tech_library_params const ps = {}, super_lib const& supergates_spec = {} )
@@ -330,12 +332,11 @@ class tech_library
                                     gate.area,
                                     {},
                                     perm,
-                                    0};
+                                    phase};
 
             for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
             {
               sg.tdelay[i] = gate.tdelay[perm[i]];
-              sg.polarity |= phase;
             }
 
             const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
@@ -399,33 +400,28 @@ class tech_library
       else
       {
         /* process the supergates */
-
         if ( !gate.is_super )
         {
           /* ignore simple gates */
           continue;
         }
 
-        /* canonize supergates */
-        auto [tt_canon, phases] = kitty::exact_n_canonization_complete( gate.function );
-        std::vector<uint8_t> perm( gate.num_vars );
-        std::iota( perm.begin(), perm.end(), 0u );
+        const auto on_np = [&]( auto const& tt, auto neg ) {
+          std::vector<uint8_t> perm( gate.num_vars );
+          std::iota( perm.begin(), perm.end(), 0u );
 
-        for( auto phase : phases )
-        {
           supergate<NInputs> sg = {&gate,
                                   gate.area,
                                   {},
                                   perm,
-                                  0};
+                                  neg};
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
             sg.tdelay[i] = gate.tdelay[perm[i]];
-            sg.polarity |= phase;
           }
 
-          const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+          const auto static_tt = kitty::extend_to<NInputs>( tt );
 
           auto& v = _super_lib[static_tt];
 
@@ -467,6 +463,82 @@ class tech_library
             v.insert( it, sg );
             ++np_count;
           }
+        };
+
+        const auto on_p = [&]() {
+          auto [tt_canon, phases] = kitty::exact_n_canonization_complete( gate.function );
+          std::vector<uint8_t> perm( gate.num_vars );
+          std::iota( perm.begin(), perm.end(), 0u );
+
+          for( auto phase : phases )
+          {
+            supergate<NInputs> sg = {&gate,
+                                    gate.area,
+                                    {},
+                                    perm,
+                                    phase};
+
+            for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
+            {
+              sg.tdelay[i] = gate.tdelay[perm[i]];
+            }
+
+            const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+
+            auto& v = _super_lib[static_tt];
+
+            /* ordered insert by ascending area and number of input pins */
+            auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+              if ( s1.area < s2.area )
+                return true;
+              if ( s1.area > s2.area )
+                return false;
+              if ( s1.root->num_vars < s2.root->num_vars )
+                return true;
+              if ( s1.root->num_vars > s2.root->num_vars )
+                return true;
+              return s1.root->id < s2.root->id;
+            } );
+
+            bool to_add = true;
+            /* search for duplicated element due to symmetries */
+            while ( it != v.end() )
+            {
+              if ( sg.root->id == it->root->id )
+              {
+                /* if already in the library exit, else ignore permutations if with equal delay cost */
+                if ( sg.polarity == it->polarity && sg.tdelay == it->tdelay )
+                {
+                  to_add = false;
+                  break;
+                }
+              }
+              else
+              {
+                break;
+              }
+              ++it;
+            }
+
+            if ( to_add )
+            {
+              v.insert( it, sg );
+              ++np_count;
+            }
+          }
+        };
+
+        if constexpr ( Configuration == classification_type::np_configurations )
+        {
+          /* N enumeration of the function */
+          const auto tt = gate.function;
+          kitty::exact_n_enumeration( tt, on_np );
+        }
+        else
+        {
+          /* N canonization of the function */
+          const auto tt = gate.function;
+          on_p();
         }
       }
 
diff --git a/lib/kitty/kitty/npn.hpp b/lib/kitty/kitty/npn.hpp
index 83cf2e0f7..b8dc7013e 100755
--- a/lib/kitty/kitty/npn.hpp
+++ b/lib/kitty/kitty/npn.hpp
@@ -28,6 +28,7 @@
   \brief Implements NPN canonization algorithms
 
   \author Mathias Soeken
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -700,8 +701,8 @@ void exact_np_enumeration( const TT& tt, Callback&& fn )
 /*! \brief Exact P enumeration
 
   Given a truth table, this function enumerates all the functions in its
-  P class. Two functions are in the same NP class, if one can be obtained
-  from the other by input negation and input permutation.
+  P class. Two functions are in the same P class, if one can be obtained
+  from the other by input permutation.
 
   The function takes a callback as second parameter which is called for
   every enumerated function. The callback should take as parameters:
@@ -754,6 +755,60 @@ void exact_p_enumeration( const TT& tt, Callback&& fn )
   }
 }
 
+/*! \brief Exact N enumeration
+
+  Given a truth table, this function enumerates all the functions in its
+  N class. Two functions are in the same N class, if one can be obtained
+  from the other by input negation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - N-enumerated truth table
+  - input negation to apply
+
+  \param tt Truth table
+  \param fn Callback for each enumerated truth table in the N class
+*/
+template<typename TT, typename Callback>
+void exact_n_enumeration( const TT& tt, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tt, 0 );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tt, 0 );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+  fn( t1, 0 );
+
+  const auto& flips = detail::flips[num_vars - 2u];
+  uint32_t phase = 0;
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    flip_inplace( t1, pos );
+
+    phase ^= 1 << pos;
+
+    fn( t1, phase );
+  }
+}
+
 /*! \brief Exact N canonization complete
 
   Given a truth table, this function finds the lexicographically smallest truth
diff --git a/test/algorithms/mapper.cpp b/test/algorithms/mapper.cpp
index 324188a99..1f606219f 100644
--- a/test/algorithms/mapper.cpp
+++ b/test/algorithms/mapper.cpp
@@ -4,12 +4,14 @@
 #include <vector>
 
 #include <lorina/genlib.hpp>
+#include <lorina/super.hpp>
 #include <mockturtle/algorithms/mapper.hpp>
 #include <mockturtle/algorithms/node_resynthesis/mig_npn.hpp>
 #include <mockturtle/algorithms/node_resynthesis/xag_npn.hpp>
 #include <mockturtle/algorithms/node_resynthesis/xmg_npn.hpp>
 #include <mockturtle/generators/arithmetic.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/io/super_reader.hpp>
 #include <mockturtle/networks/aig.hpp>
 #include <mockturtle/networks/klut.hpp>
 #include <mockturtle/networks/mig.hpp>
@@ -29,6 +31,15 @@ std::string const test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9
                                  "GATE   zero    0 O=0;\n"
                                  "GATE   one     0 O=1;";
 
+std::string const super_library = "test.genlib\n"
+                                  "3\n"
+                                  "2\n"
+                                  "6\n"       
+                                  "* nand2 1 0\n"
+                                  "inv1 3\n"
+                                  "* nand2 2 4\n"
+                                  "\0";
+
 TEST_CASE( "Map of MAJ3", "[mapper]" )
 {
   std::vector<gate> gates;
@@ -281,6 +292,46 @@ TEST_CASE( "Map of buffer and constant outputs", "[mapper]" )
   CHECK( st.delay < 1.9f + eps );
 }
 
+TEST_CASE( "Map with supergates", "[mapper]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_lib( test_library );
+  auto result = lorina::read_genlib( in_lib, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, super_reader( super_data ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates, super_data );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto n4 = aig.create_and( a, b );
+  const auto n5 = aig.create_and( b, c );
+  const auto f = aig.create_and( n4, n5 );
+  aig.create_po( f );
+
+  map_params ps;
+  map_stats st;
+  binding_view<klut_network> luts = map( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area == 6.0f );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
 TEST_CASE( "Exact map of bad MAJ3 and constant output", "[mapper]" )
 {
   mig_npn_resynthesis resyn{ true };
diff --git a/test/utils/super_utils.cpp b/test/utils/super_utils.cpp
index e2744222b..e63fb6da3 100644
--- a/test/utils/super_utils.cpp
+++ b/test/utils/super_utils.cpp
@@ -4,6 +4,7 @@
 #include <vector>
 
 #include <lorina/genlib.hpp>
+#include <lorina/super.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
 #include <mockturtle/utils/super_utils.hpp>
 
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index e8bd92ed7..8f8545693 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -4,6 +4,7 @@
 #include <vector>
 
 #include <lorina/genlib.hpp>
+#include <lorina/super.hpp>
 #include <mockturtle/io/genlib_reader.hpp>
 #include <mockturtle/io/super_reader.hpp>
 #include <mockturtle/utils/super_utils.hpp>

From b4ec2a41d55d1d8eedd8836398b393ed13e0fce3 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 10 Aug 2021 11:42:41 +0200
Subject: [PATCH 28/40] Added tests for binding_library

---
 include/mockturtle/utils/tech_library.hpp |   4 +-
 test/views/binding_view.cpp               | 148 ++++++++++++++++++++++
 2 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 test/views/binding_view.cpp

diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 245a21e16..c16cd8409 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -121,8 +121,8 @@ struct supergate
  * 
  * The configuration is selected using the template
  * parameter `Configuration`. P-configuration is suggested
- * for libraries with more than 20 gates. The template parameter
- * `NInputs` selects the maximum number of variables
+ * for big libraries with few symmetric gates. The template
+ * parameter `NInputs` selects the maximum number of variables
  * allowed for a gate in the library.
  *
    \verbatim embed:rst
diff --git a/test/views/binding_view.cpp b/test/views/binding_view.cpp
new file mode 100644
index 000000000..c01839b9a
--- /dev/null
+++ b/test/views/binding_view.cpp
@@ -0,0 +1,148 @@
+#include <catch.hpp>
+
+#include <sstream>
+
+#include <lorina/genlib.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/views/binding_view.hpp>
+
+using namespace mockturtle;
+
+std::string const simple_library = "GATE zero 0 O=0;\n"
+                                   "GATE one 0 O=1;\n"
+                                   "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE and 5 O=(ab); PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE or 5 O={ab}; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n";
+
+TEST_CASE( "Create binding view", "[binding_view]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  binding_view<klut_network> ntk( gates );
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.add_binding( ntk.get_node( c0 ), 0 );
+  ntk.add_binding( ntk.get_node( t1 ), 4 );
+  ntk.add_binding( ntk.get_node( t2 ), 5 );
+  ntk.add_binding( ntk.get_node( f ), 4 );
+  ntk.add_binding( ntk.get_node( g ), 2 );
+
+  CHECK( ntk.has_binding( ntk.get_node( a ) ) == false );
+  CHECK( ntk.has_binding( ntk.get_node( b ) ) == false );
+  CHECK( ntk.has_binding( ntk.get_node( c ) ) == false );
+  CHECK( ntk.has_binding( ntk.get_node( d ) ) == false );
+  CHECK( ntk.has_binding( ntk.get_node( c0 ) ) == true );
+  CHECK( ntk.has_binding( ntk.get_node( t1 ) ) == true );
+  CHECK( ntk.has_binding( ntk.get_node( t2 ) ) == true );
+  CHECK( ntk.has_binding( ntk.get_node( f ) ) == true );
+  CHECK( ntk.has_binding( ntk.get_node( g ) ) == true );
+
+  CHECK( ntk.get_binding_index( ntk.get_node( c0 ) ) == 0 );
+  CHECK( ntk.get_binding_index( ntk.get_node( t1 ) ) == 4 );
+  CHECK( ntk.get_binding_index( ntk.get_node( t2 ) ) == 5 );
+  CHECK( ntk.get_binding_index( ntk.get_node( f ) ) == 4 );
+  CHECK( ntk.get_binding_index( ntk.get_node( g ) ) == 2 );
+
+  CHECK( ntk.get_binding( ntk.get_node( c0 ) ).name == "zero" );
+  CHECK( ntk.get_binding( ntk.get_node( t1 ) ).name == "and" );
+  CHECK( ntk.get_binding( ntk.get_node( t2 ) ).name == "or" );
+  CHECK( ntk.get_binding( ntk.get_node( f ) ).name == "and" );
+  CHECK( ntk.get_binding( ntk.get_node( g ) ).name == "inverter" );
+
+  CHECK( ntk.compute_area() == 16 );
+  CHECK( ntk.compute_worst_delay() == 2 );
+
+  std::stringstream report_stats;
+  ntk.report_stats( report_stats );
+  CHECK( report_stats.str() == "[i] Report stats: area = 16.00; delay =  2.00;\n" );
+
+  std::stringstream report_gates;
+  ntk.report_gates_usage( report_gates );
+  CHECK( report_gates.str() == "[i] Report gates usage:\n"
+                               "[i] zero           \t Instance =          1\t Area =         0.00     0.00 %\n"
+                               "[i] inverter       \t Instance =          1\t Area =         1.00     6.25 %\n"
+                               "[i] and            \t Instance =          2\t Area =        10.00    62.50 %\n"
+                               "[i] or             \t Instance =          1\t Area =         5.00    31.25 %\n"
+                               "[i] TOTAL          \t Instance =          5\t Area =        16.00   100.00 %\n" );
+}
+
+TEST_CASE( "Binding view on copy", "[binding_view]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  binding_view<klut_network> ntk( gates );
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.add_binding( ntk.get_node( c0 ), 0 );
+  ntk.add_binding( ntk.get_node( t1 ), 4 );
+  ntk.add_binding( ntk.get_node( t2 ), 5 );
+  ntk.add_binding( ntk.get_node( f ), 4 );
+  ntk.add_binding( ntk.get_node( g ), 2 );
+
+  binding_view<klut_network> ntk_copy = ntk;
+
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( a ) ) == false );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( b ) ) == false );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( c ) ) == false );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( d ) ) == false );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( c0 ) ) == true );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( t1 ) ) == true );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( t2 ) ) == true );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( f ) ) == true );
+  CHECK( ntk_copy.has_binding( ntk_copy.get_node( g ) ) == true );
+
+  CHECK( ntk_copy.get_binding_index( ntk_copy.get_node( c0 ) ) == 0 );
+  CHECK( ntk_copy.get_binding_index( ntk_copy.get_node( t1 ) ) == 4 );
+  CHECK( ntk_copy.get_binding_index( ntk_copy.get_node( t2 ) ) == 5 );
+  CHECK( ntk_copy.get_binding_index( ntk_copy.get_node( f ) ) == 4 );
+  CHECK( ntk_copy.get_binding_index( ntk_copy.get_node( g ) ) == 2 );
+
+  CHECK( ntk_copy.get_binding( ntk_copy.get_node( c0 ) ).name == "zero" );
+  CHECK( ntk_copy.get_binding( ntk_copy.get_node( t1 ) ).name == "and" );
+  CHECK( ntk_copy.get_binding( ntk_copy.get_node( t2 ) ).name == "or" );
+  CHECK( ntk_copy.get_binding( ntk_copy.get_node( f ) ).name == "and" );
+  CHECK( ntk_copy.get_binding( ntk_copy.get_node( g ) ).name == "inverter" );
+
+  CHECK( ntk_copy.compute_area() == 16 );
+  CHECK( ntk_copy.compute_worst_delay() == 2 );
+}

From 8325603e7a7bf5cfb2918144680279cf04084b1d Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 10 Aug 2021 12:26:39 +0200
Subject: [PATCH 29/40] Fixes

---
 include/mockturtle/io/super_reader.hpp  | 2 +-
 include/mockturtle/io/write_verilog.hpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mockturtle/io/super_reader.hpp b/include/mockturtle/io/super_reader.hpp
index 1c7c7d9ac..b0a3ef441 100644
--- a/include/mockturtle/io/super_reader.hpp
+++ b/include/mockturtle/io/super_reader.hpp
@@ -90,7 +90,7 @@ class super_reader : public lorina::super_reader
   virtual void on_supergate( std::string const& name, bool const& is_super, std::vector<uint32_t> const& fanins_id ) const override
   {
 
-    lib.supergates.emplace_back( supergate_spec{ lib.supergates.size(),
+    lib.supergates.emplace_back( supergate_spec{ static_cast<unsigned int>( lib.supergates.size() ),
                                                  name,
                                                  is_super,
                                                  fanins_id } );
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index 4082e81d0..efcb0d53e 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -494,12 +494,12 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
   auto const& gates = ntk.get_library();
 
   int nDigits = ( int ) std::floor( std::log10( ntk.num_gates() ) );
-  unsigned long length = 0;
+  unsigned int length = 0;
   unsigned counter = 0;
 
   for ( auto const& gate : gates )
   {
-    length = std::max( length, gate.name.length() );
+    length = std::max( length, static_cast<unsigned int>( gate.name.length() ) );
   }
 
   topo_view ntk_topo{ntk};

From 7be62c5ee3e75a224dd5c7ae97f30caeeb741b83 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 10 Aug 2021 13:49:36 +0200
Subject: [PATCH 30/40] Fixes cast

---
 include/mockturtle/utils/super_utils.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 877493723..3ff5a2de4 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -138,7 +138,7 @@ class super_utils
         pin_to_pin_delays[i++] = std::max( pin.rise_block_delay, pin.fall_block_delay );
       }
 
-      _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
+      _supergates.emplace_back( composed_gate<NInputs>{static_cast<unsigned int>( _supergates.size() ),
                                                        false,
                                                        &g,
                                                        g.num_vars,
@@ -190,7 +190,7 @@ class super_utils
       kitty::dynamic_truth_table tt{ NInputs };
       kitty::create_nth_var( tt, i );
 
-      _supergates.emplace_back( composed_gate<NInputs>{i,
+      _supergates.emplace_back( composed_gate<NInputs>{static_cast<unsigned int>( i ),
                                                        false,
                                                        nullptr,
                                                        0,
@@ -276,7 +276,7 @@ class super_utils
       auto tt_test = tt;
       std::vector<uint8_t> const& support = kitty::min_base_inplace( tt_test );
 
-      _supergates.emplace_back( composed_gate<NInputs>{_supergates.size(),
+      _supergates.emplace_back( composed_gate<NInputs>{static_cast<unsigned int>( _supergates.size() ),
                                                        is_super_verified,
                                                        &_gates[root_match_id],
                                                        0,

From de3bc6bc53d78397182e0cd6c5771dae32c79619 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 10 Aug 2021 15:28:11 +0200
Subject: [PATCH 31/40] Added tests to improve coverage

---
 test/io/write_verilog.cpp   | 54 +++++++++++++++++++++
 test/utils/tech_library.cpp | 93 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/test/io/write_verilog.cpp b/test/io/write_verilog.cpp
index aef9400f2..23d65f943 100644
--- a/test/io/write_verilog.cpp
+++ b/test/io/write_verilog.cpp
@@ -219,3 +219,57 @@ TEST_CASE( "write mapped network into Verilog file", "[write_verilog]" )
                       "  inv2   g2( .a (y1), .O (y2) );\n"
                       "endmodule\n" );
 }
+
+TEST_CASE( "write mapped network with multiple driven POs and register names into Verilog file", "[write_verilog]" )
+{
+  std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                          "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                          "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                          "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
+
+  std::vector<gate> gates;
+  std::istringstream in( simple_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  binding_view<klut_network> klut( gates );
+
+  const auto a = klut.create_pi();
+  const auto b = klut.create_pi();
+  const auto c = klut.create_pi();
+
+  /* create buffer */
+  uint64_t buf_func = 0x2;
+  kitty::dynamic_truth_table tt_buf( 1 );
+  kitty::create_from_words( tt_buf, &buf_func, &buf_func + 1 );
+  const auto buf = klut.create_node( { a }, tt_buf );
+  
+  const auto f1 = klut.create_nand( b, c );
+  const auto f2 = klut.create_not( f1 );
+
+  klut.create_po( buf );
+  klut.create_po( f1 );
+  klut.create_po( f1 );
+  klut.create_po( f2 );
+
+  klut.add_binding( klut.get_node( buf ), 2 );
+  klut.add_binding( klut.get_node( f1 ), 3 );
+  klut.add_binding( klut.get_node( f2 ), 1 );
+
+  std::ostringstream out;
+  write_verilog_params ps;
+  ps.input_names = {{"ref", 1u}, {"data", 2u}};
+  ps.output_names = {{"y", 4u}};
+  write_verilog( klut, out, ps );
+
+  CHECK( out.str() == "module top( ref , data , y );\n"
+                      "  input [0:0] ref ;\n"
+                      "  input [1:0] data ;\n"
+                      "  output [3:0] y ;\n"
+                      "  buf    g0( .a (ref[0]), .O (y[0]) );\n"
+                      "  nand2  g1( .a (data[0]), .b (data[1]), .O (y[1]) );\n"
+                      "  nand2  g2( .a (data[0]), .b (data[1]), .O (y[2]) );\n"
+                      "  inv2   g3( .a (y[1]), .O (y[3]) );\n"
+                      "endmodule\n" );
+}
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index 8f8545693..b59f6c307 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -193,7 +193,7 @@ TEST_CASE( "Simple test library generation 2", "[tech_library]" )
   CHECK( nand_e == nullptr );
 }
 
-TEST_CASE( "Supergate library generation", "[tech_library]" )
+TEST_CASE( "Supergate library generation P", "[tech_library]" )
 {
   std::vector<gate> gates;
   super_lib super_data;
@@ -281,6 +281,96 @@ TEST_CASE( "Supergate library generation", "[tech_library]" )
   CHECK( ( *and_01 )[1].polarity == 7u );
 }
 
+TEST_CASE( "Supergate library generation NP", "[tech_library]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_genlib( simple_library );
+  auto result = lorina::read_genlib( in_genlib, genlib_reader( gates ) );
+  
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, mockturtle::super_reader( super_data ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params ps;
+  ps.verbose = true;
+  ps.very_verbose = true;
+  tech_library<3, classification_type::np_configurations> lib( gates, super_data );
+  fflush( stdout );
+
+  CHECK( lib.max_gate_size() == 3 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 1.0f, 2u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 3u ) );
+
+  kitty::static_truth_table<3> tt;
+
+  kitty::create_from_hex_string( tt, "11" );
+  auto const and_1 = lib.get_supergates( tt );
+  CHECK( and_1 != nullptr );
+  CHECK( and_1->size() == 1 );
+  CHECK( ( *and_1 )[0].root->root->name == "and" );
+  CHECK( ( *and_1 )[0].area == 5.0f );
+  CHECK( ( *and_1 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *and_1 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *and_1 )[0].polarity == 3u );
+
+  kitty::create_from_hex_string( tt, "22" );
+  auto const and_2 = lib.get_supergates( tt );
+  CHECK( and_2 != nullptr );
+  CHECK( and_2->size() == 1 );
+  CHECK( ( *and_2 )[0].root->root->name == "and" );
+  CHECK( ( *and_2 )[0].area == 5.0f );
+  CHECK( ( *and_2 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *and_2 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *and_2 )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "44" );
+  auto const and_4 = lib.get_supergates( tt );
+  CHECK( and_4 != nullptr );
+  CHECK( and_4->size() == 1 );
+  CHECK( ( *and_4 )[0].root->root->name == "and" );
+  CHECK( ( *and_4 )[0].area == 5.0f );
+  CHECK( ( *and_4 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *and_4 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *and_4 )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "88" );
+  auto const and_8 = lib.get_supergates( tt );
+  CHECK( and_8 != nullptr );
+  CHECK( and_8->size() == 1 );
+  CHECK( ( *and_8 )[0].root->root->name == "and" );
+  CHECK( ( *and_8 )[0].area == 5.0f );
+  CHECK( ( *and_8 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *and_8 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *and_8 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "07" );
+  auto const andor_07 = lib.get_supergates( tt );
+  CHECK( andor_07 != nullptr );
+  CHECK( andor_07->size() == 1 );
+  CHECK( ( *andor_07 )[0].root->root->name == "and" );
+  CHECK( ( *andor_07 )[0].area == 10.0f );
+  CHECK( ( *andor_07 )[0].tdelay[0] == 2.0f );
+  CHECK( ( *andor_07 )[0].tdelay[1] == 2.0f );
+  CHECK( ( *andor_07 )[0].tdelay[2] == 1.0f );
+  CHECK( ( *andor_07 )[0].polarity == 7u );
+
+  kitty::create_from_hex_string( tt, "e0" );
+  auto const andor_e0 = lib.get_supergates( tt );
+  CHECK( andor_e0 != nullptr );
+  CHECK( andor_e0->size() == 1 );
+  CHECK( ( *andor_e0 )[0].root->root->name == "and" );
+  CHECK( ( *andor_e0 )[0].area == 10.0f );
+  CHECK( ( *andor_e0 )[0].tdelay[0] == 2.0f );
+  CHECK( ( *andor_e0 )[0].tdelay[1] == 2.0f );
+  CHECK( ( *andor_e0 )[0].tdelay[2] == 1.0f );
+  CHECK( ( *andor_e0 )[0].polarity == 0u );
+}
+
 TEST_CASE( "Complete library generation", "[tech_library]" )
 {
   std::vector<gate> gates;
@@ -321,5 +411,4 @@ TEST_CASE( "Complete library generation", "[tech_library]" )
 
     kitty::exact_np_enumeration( tt, test_enumeration );
   }
-  
 }
\ No newline at end of file

From ef3e002b2a880eaaf5bfc9785fc20d93647e5736 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 10 Aug 2021 17:12:39 +0200
Subject: [PATCH 32/40] Header and description fixes

---
 include/mockturtle/io/super_reader.hpp   | 5 ++---
 include/mockturtle/utils/super_utils.hpp | 6 ------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/include/mockturtle/io/super_reader.hpp b/include/mockturtle/io/super_reader.hpp
index b0a3ef441..7484c7eec 100644
--- a/include/mockturtle/io/super_reader.hpp
+++ b/include/mockturtle/io/super_reader.hpp
@@ -24,8 +24,8 @@
  */
 
 /*!
-  \file genlib_reader.hpp
-  \brief Reader visitor for GENLIB files
+  \file super_reader.hpp
+  \brief Reader visitor for SUPER files generated by ABC
 
   \author Alessandro Tempia Calvino
   \author Shubham Rai
@@ -36,7 +36,6 @@
 #include "../traits.hpp"
 
 #include <fmt/format.h>
-#include <kitty/constructors.hpp>
 #include <lorina/super.hpp>
 
 namespace mockturtle
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 3ff5a2de4..95e29c73c 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -39,15 +39,9 @@
 
 #include <kitty/constructors.hpp>
 #include <kitty/dynamic_truth_table.hpp>
-#include <kitty/npn.hpp>
-#include <kitty/print.hpp>
-#include <kitty/static_truth_table.hpp>
-#include <lorina/lorina.hpp>
 
 #include "../io/genlib_reader.hpp"
 #include "../io/super_reader.hpp"
-#include "../traits.hpp"
-#include "../utils/truth_table_cache.hpp"
 
 namespace mockturtle
 {

From f387bdb5184bbcdcd344f4d8864c6063723ef762 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Wed, 11 Aug 2021 18:45:24 +0200
Subject: [PATCH 33/40] Fixes in constant gates for write_verilog, extended
 usage of topo_view in mapping

---
 include/mockturtle/algorithms/mapper.hpp | 42 +++++++++++-------------
 include/mockturtle/io/write_verilog.hpp  | 37 +++++++++++++++++----
 include/mockturtle/utils/super_utils.hpp |  4 ---
 test/io/write_verilog.cpp                | 22 ++++++++-----
 4 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 07d6b0a07..d7d3e2127 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -654,17 +654,17 @@ class tech_map_impl
     } );
 
     /* propagate required time to the PIs */
-    auto i = ntk.size();
-    while ( i-- > 0u )
+    for ( auto it = top_order.rbegin(); it != top_order.rend(); ++it )
     {
-      const auto n = ntk.index_to_node( i );
-      if ( ntk.is_pi( n ) || ntk.is_constant( n ) )
+      if ( ntk.is_pi( *it ) || ntk.is_constant( *it ) )
         break;
 
-      if ( node_match[i].map_refs[2] == 0 )
+      const auto index = ntk.node_to_index( *it );
+
+      if ( node_match[index].map_refs[2] == 0 )
         continue;
 
-      auto& node_data = node_match[i];
+      auto& node_data = node_match[index];
 
       unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
       unsigned other_phase = use_phase ^ 1;
@@ -681,7 +681,7 @@ class tech_map_impl
       if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
       {
         auto ctr = 0u;
-        auto best_cut = cuts.cuts( i )[node_data.best_cut[use_phase]];
+        auto best_cut = cuts.cuts( index )[node_data.best_cut[use_phase]];
         auto const& supergate = node_data.best_supergate[use_phase];
         for ( auto leaf : best_cut )
         {
@@ -694,7 +694,7 @@ class tech_map_impl
       if ( !node_data.same_match && node_data.map_refs[other_phase] > 0 )
       {
         auto ctr = 0u;
-        auto best_cut = cuts.cuts( i )[node_data.best_cut[other_phase]];
+        auto best_cut = cuts.cuts( index )[node_data.best_cut[other_phase]];
         auto const& supergate = node_data.best_supergate[other_phase];
         for ( auto leaf : best_cut )
         {
@@ -1335,7 +1335,8 @@ class tech_map_impl
 
   void finalize_cover( binding_view<klut_network>& res, klut_map& old2new )
   {
-    ntk.foreach_node( [&]( auto const& n ) {
+    for ( auto const& n : top_order )
+    {
       auto index = ntk.node_to_index( n );
       auto const& node_data = node_match[index];
 
@@ -1343,7 +1344,7 @@ class tech_map_impl
       if ( ntk.is_constant( n ) )
       {
         if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
-          return true;
+          continue;
       }
       else if ( ntk.is_pi( n ) )
       {
@@ -1352,12 +1353,12 @@ class tech_map_impl
           old2new[index][1] = res.create_not( old2new[n][0] );
           res.add_binding( res.get_node( old2new[index][1] ), lib_inv_id );
         }
-        return true;
+        continue;
       }
 
       /* continue if cut is not in the cover */
       if ( node_data.map_refs[2] == 0u )
-        return true;
+        continue;
 
       unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
 
@@ -1380,9 +1381,7 @@ class tech_map_impl
       {
         create_lut_for_gate( res, old2new, index, phase );
       }
-
-      return true;
-    } );
+    }
 
     /* create POs */
     ntk.foreach_po( [&]( auto const& f ) {
@@ -1526,25 +1525,26 @@ class tech_map_impl
   {
     double power = 0.0f;
 
-    ntk.foreach_node( [&]( auto const& n ) {
+    for ( auto const& n : top_order )
+    {
       const auto index = ntk.node_to_index( n );
       auto& node_data = node_match[index];
 
       if ( ntk.is_constant( n ) )
       {
         if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
-          return true;
+          continue;
       }
       else if ( ntk.is_pi( n ) )
       {
         if ( node_data.map_refs[1] > 0 )
           power += switch_activity[ntk.node_to_index( n )];
-        return true;
+        continue;
       }
 
       /* continue if cut is not in the cover */
       if ( node_match[index].map_refs[2] == 0u )
-        return true;
+        continue;
 
       unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
 
@@ -1561,9 +1561,7 @@ class tech_map_impl
       {
         power += switch_activity[ntk.node_to_index( n )];
       }
-
-      return true;
-    } );
+    }
 
     return power;
   }
diff --git a/include/mockturtle/io/write_verilog.hpp b/include/mockturtle/io/write_verilog.hpp
index efcb0d53e..0fee23d06 100644
--- a/include/mockturtle/io/write_verilog.hpp
+++ b/include/mockturtle/io/write_verilog.hpp
@@ -445,6 +445,36 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
   } );
 
   std::vector<std::string> ws;
+  node_map<std::string, binding_view<Ntk>> node_names( ntk );
+
+  /* constants */
+  if ( ntk.has_binding( ntk.get_constant( false ) ) )
+  {
+    node_names[ntk.get_constant( false )] = fmt::format("n{}", ntk.node_to_index( ntk.get_constant( false ) ) );
+    if ( !po_nodes.has( ntk.get_constant( false ) ) )
+    {
+      ws.emplace_back( node_names[ntk.get_constant( false )] );
+    }
+  }
+  else
+  {
+    node_names[ntk.get_constant( false )] = "1'b0";
+  }
+  if ( ntk.get_node( ntk.get_constant( false ) ) != ntk.get_node( ntk.get_constant( true ) ) )
+  {
+    if ( ntk.has_binding( ntk.get_constant( true ) ) )
+    {
+      node_names[ntk.get_constant( true )] = fmt::format("n{}", ntk.node_to_index( ntk.get_constant( true ) ) );
+      if ( !po_nodes.has( ntk.get_constant( true ) ) )
+      {
+        ws.emplace_back( node_names[ntk.get_constant( true )] );
+      }
+    }
+    else
+    {
+      node_names[ntk.get_constant( true )] = "1'b1";
+    }
+  }
 
   /* add wires */
   ntk.foreach_gate( [&]( auto const& n ) {
@@ -482,11 +512,6 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
     writer.on_wire( ws );
   }
 
-  node_map<std::string, binding_view<Ntk>> node_names( ntk );
-  node_names[ntk.get_constant( false )] = "1'b0";
-  if ( ntk.get_node( ntk.get_constant( false ) ) != ntk.get_node( ntk.get_constant( true ) ) )
-    node_names[ntk.get_constant( true )] = "1'b1";
-
   ntk.foreach_pi( [&]( auto const& n, auto i ) {
     node_names[n] = xs[i];
   } );
@@ -539,7 +564,7 @@ void write_verilog( binding_view<Ntk> const& ntk, std::ostream& os, write_verilo
       /* if node drives multiple POs, duplicate */
       if ( po_nodes.has( n ) && po_nodes[n].size() > 1 )
       {
-        std::cout << "[i] node " << n << " driving multiple POs has been duplicated.\n";
+        std::cerr << "[i] node " << n << " driving multiple POs has been duplicated.\n";
         auto const& po_list = po_nodes[n];
         for ( auto i = 1u; i < po_list.size(); ++i )
         {
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 95e29c73c..f952490c5 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -266,10 +266,6 @@ class super_utils
       float area = compute_area( root_match_id, sub_gates );
       const kitty::dynamic_truth_table tt = compute_truth_table( root_match_id, sub_gates );
 
-      /* try truth table minimization */
-      auto tt_test = tt;
-      std::vector<uint8_t> const& support = kitty::min_base_inplace( tt_test );
-
       _supergates.emplace_back( composed_gate<NInputs>{static_cast<unsigned int>( _supergates.size() ),
                                                        is_super_verified,
                                                        &_gates[root_match_id],
diff --git a/test/io/write_verilog.cpp b/test/io/write_verilog.cpp
index 23d65f943..1f4f6dd96 100644
--- a/test/io/write_verilog.cpp
+++ b/test/io/write_verilog.cpp
@@ -174,7 +174,8 @@ TEST_CASE( "write buffered AIG into Verilog file", "[write_verilog]" )
 
 TEST_CASE( "write mapped network into Verilog file", "[write_verilog]" )
 {
-  std::string const simple_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+  std::string const simple_test_library = "GATE   zero    0 O=0;\n"
+                                          "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
                                           "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
                                           "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
                                           "GATE   nand2   2 O=!(ab);  PIN * INV 1 999 1.0 0.2 1.0 0.2\n";
@@ -200,23 +201,26 @@ TEST_CASE( "write mapped network into Verilog file", "[write_verilog]" )
   const auto f1 = klut.create_nand( b, c );
   const auto f2 = klut.create_not( f1 );
 
+  klut.create_po( klut.get_constant( false ) );
   klut.create_po( buf );
   klut.create_po( f1 );
   klut.create_po( f2 );
 
-  klut.add_binding( klut.get_node( buf ), 2 );
-  klut.add_binding( klut.get_node( f1 ), 3 );
-  klut.add_binding( klut.get_node( f2 ), 1 );
+  klut.add_binding( klut.get_node( klut.get_constant( false ) ), 0 );
+  klut.add_binding( klut.get_node( buf ), 3 );
+  klut.add_binding( klut.get_node( f1 ), 4 );
+  klut.add_binding( klut.get_node( f2 ), 2 );
 
   std::ostringstream out;
   write_verilog( klut, out );
 
-  CHECK( out.str() == "module top( x0 , x1 , x2 , y0 , y1 , y2 );\n"
+  CHECK( out.str() == "module top( x0 , x1 , x2 , y0 , y1 , y2 , y3 );\n"
                       "  input x0 , x1 , x2 ;\n"
-                      "  output y0 , y1 , y2 ;\n"
-                      "  buf    g0( .a (x0), .O (y0) );\n"
-                      "  nand2  g1( .a (x1), .b (x2), .O (y1) );\n"
-                      "  inv2   g2( .a (y1), .O (y2) );\n"
+                      "  output y0 , y1 , y2 , y3 ;\n"
+                      "  zero   g0( .O (y0) );\n"
+                      "  buf    g1( .a (x0), .O (y1) );\n"
+                      "  nand2  g2( .a (x1), .b (x2), .O (y2) );\n"
+                      "  inv2   g3( .a (y2), .O (y3) );\n"
                       "endmodule\n" );
 }
 

From 69c4190680979d1af1422f8cbf7890c7c65817f2 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Thu, 12 Aug 2021 12:38:11 +0200
Subject: [PATCH 34/40] Added new mapper, library, supergates, and i/o
 documentation

---
 docs/algorithms/mapper.rst                | 20 +++++----
 docs/io/lorina_readers.rst                |  4 ++
 docs/io/writers.rst                       |  2 +
 docs/utils/util_data_structures.rst       | 14 ++++++
 docs/views.rst                            |  8 ++++
 include/mockturtle/io/genlib_reader.hpp   |  8 +++-
 include/mockturtle/io/super_reader.hpp    |  6 ++-
 include/mockturtle/utils/super_utils.hpp  | 23 +++++++++-
 include/mockturtle/utils/tech_library.hpp | 10 ++++-
 include/mockturtle/views/binding_view.hpp | 53 ++++++++++++++++++++++-
 10 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst
index f0430740f..ebc4d97a9 100644
--- a/docs/algorithms/mapper.rst
+++ b/docs/algorithms/mapper.rst
@@ -14,12 +14,13 @@ The mapper uses a library (hash table) to facilitate Boolean matching.
 For technology mapping, it needs `tech_library` while for graph mapping
 it needs `exact_library`. For technology mapping, the generation of both NP- and
 P-configurations of gates are supported. Generally, it is convenient to use
-NP-configurations for small cell libraries (<20 gates). For bigger libraries,
-P-configurations perform better. For graph mapping, NPN classification is used
-instead.
+NP-configurations for small or medium size cell libraries. For bigger libraries,
+P-configurations should perform better. You can test both the configurations to
+see which one has the best run time. For graph mapping, NPN classification
+is used instead.
 
 The following example shows how to perform delay-oriented technology mapping
-from an And-inverter graph using the default settings:
+from an and-inverter graph using the default settings:
 
 .. code-block:: c++
 
@@ -32,10 +33,10 @@ from an And-inverter graph using the default settings:
    tech_library tech_lib( gates );
 
    /* perform technology mapping */
-   klut_network res = map( aig, tech_lib );
+   binding_view<klut_network> res = map( aig, tech_lib );
 
-The mapped network is returned as a k-LUT network in which each k-LUT
-abstracts a cell.
+The mapped network is returned as a `binding_view` that extends a k-LUT network.
+Each k-LUT abstracts a cell and the view contains the binding information.
 
 The next example performs area-oriented graph mapping from AIG to MIG
 using a NPN resynthesis database of structures:
@@ -59,8 +60,9 @@ target graph representation if possible (e.g. read an AIG as a MIG)
 since the mapping often leads to better results in this setting.
 
 As a default setting, cut enumeration minimizes the truth tables.
-This helps improving the results but slows down the computation. For
-a faster mapping set the truth table minimization parameter to false.
+This helps improving the results but slows down the computation.
+We suggest to keep it always true. Anyhow, for a faster mapping,
+set the truth table minimization parameter to false.
 The maximum number of cuts stored for each node is limited to 49.
 To increase this limit, change `max_cut_num` in `fast_network_cuts`.
 
diff --git a/docs/io/lorina_readers.rst b/docs/io/lorina_readers.rst
index 13d129787..ce3c25c6a 100644
--- a/docs/io/lorina_readers.rst
+++ b/docs/io/lorina_readers.rst
@@ -14,3 +14,7 @@ implements the reader callback ``<format>_reader``.
 .. doxygenclass:: mockturtle::pla_reader
 
 .. doxygenclass:: mockturtle::verilog_reader
+
+.. doxygenclass:: mockturtle::genlib_reader
+
+.. doxygenclass:: mockturtle::super_reader
diff --git a/docs/io/writers.rst b/docs/io/writers.rst
index 779e18f01..0fdab71d4 100644
--- a/docs/io/writers.rst
+++ b/docs/io/writers.rst
@@ -37,6 +37,8 @@ Write into structural Verilog files
 
 .. doxygenfunction:: mockturtle::write_verilog(Ntk const&, std::ostream&, write_verilog_params const&)
 
+.. doxygenfunction:: mockturtle::write_verilog(binding_view<Ntk> const&, std::ostream&, write_verilog_params const&)
+
 Write into DIMACS files (CNF)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/utils/util_data_structures.rst b/docs/utils/util_data_structures.rst
index ac324af00..d09ecbaa7 100644
--- a/docs/utils/util_data_structures.rst
+++ b/docs/utils/util_data_structures.rst
@@ -65,6 +65,20 @@ Exact Library
 .. doxygenclass:: mockturtle::exact_library
    :members:
 
+Super Utils
+~~~~~~~~~~~
+
+**Header:** ``mockturtle/utils/super_utils.hpp``
+
+.. doc_overview_table:: classmockturtle_1_1super__utils
+   :column: Method
+
+   get_super_library
+   get_standard_library_size
+
+.. doxygenclass:: mockturtle::super_utils
+   :members:
+
 Cuts
 ~~~~
 
diff --git a/docs/views.rst b/docs/views.rst
index 571cee11c..74e80415d 100644
--- a/docs/views.rst
+++ b/docs/views.rst
@@ -74,6 +74,14 @@ algorithm.  Several views are implemented in mockturtle.
 .. doxygenclass:: mockturtle::window_view
    :members:
 
+`binding_view`: Add bindings to a technology library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/views/binding_view.hpp``
+
+.. doxygenclass:: mockturtle::binding_view
+   :members:
+
 `names_view`: Assign names to signals and outputs
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/include/mockturtle/io/genlib_reader.hpp b/include/mockturtle/io/genlib_reader.hpp
index 8e64753ed..b1b004b28 100644
--- a/include/mockturtle/io/genlib_reader.hpp
+++ b/include/mockturtle/io/genlib_reader.hpp
@@ -73,7 +73,11 @@ struct gate
 }; /* gate */
 
 /*! \brief lorina callbacks for GENLIB files.
- *
+ * 
+ * Note that the currently supported syntax for functions is
+ * different from the genlib standard. For more info,
+ * look into `kitty::create_from_expression`.
+ * 
    \verbatim embed:rst
 
    Example
@@ -81,7 +85,7 @@ struct gate
    .. code-block:: c++
 
       std::vector<gate> gates;
-      lorina::read_genlib( "file.lib", genlib_reader( gates ) );
+      lorina::read_genlib( "file.genlib", genlib_reader( gates ) );
    \endverbatim
  */
 class genlib_reader : public lorina::genlib_reader
diff --git a/include/mockturtle/io/super_reader.hpp b/include/mockturtle/io/super_reader.hpp
index 7484c7eec..125eb5222 100644
--- a/include/mockturtle/io/super_reader.hpp
+++ b/include/mockturtle/io/super_reader.hpp
@@ -59,6 +59,8 @@ struct super_lib
 };
 
 /*! \brief lorina callbacks for SUPER files.
+ *
+ * SUPER files can be generated by ABC with the command `super`.
  *
    \verbatim embed:rst
 
@@ -66,8 +68,8 @@ struct super_lib
 
    .. code-block:: c++
 
-      std::vector<mockturtle::supergates_spec> supergates;
-      lorina::read_genlib( "file.super", mockturtle::super_reader( supergates ) );
+      super_lib supergates_spec;
+      lorina::read_super( "file.super", super_reader( supergates_spec ) );
    \endverbatim
  */
 class super_reader : public lorina::super_reader
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index f952490c5..c249cfe85 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -80,6 +80,15 @@ struct composed_gate
   std::vector<composed_gate<NInputs>*> fanin{};
 };
 
+/*! \brief Utilities to generate supergates
+ *
+ * This class creates supergates starting from supergates
+ * specifications contained in `supergates_spec` extracted
+ * from a SUPER file.
+ * 
+ * This utility is called by `tech_library` to construct
+ * the library for technology mapping.
+ */
 template<unsigned NInputs = 5u>
 class super_utils
 {
@@ -100,11 +109,21 @@ class super_utils
     }
   }
 
+  /*! \brief Get the all the supergates.
+   *
+   * Returns a list of supergates created accordingly to
+   * the standard library and the supergates specifications.
+   */
   const std::deque<composed_gate<NInputs>>& get_super_library() const
   {
     return _supergates;
   }
 
+  /*! \brief Get the number of standard gates.
+   *
+   * Returns the number of standard gates contained in the
+   * supergate library.
+   */
   const uint32_t get_standard_library_size() const
   {
     return simple_gates_size;
@@ -255,8 +274,8 @@ class super_utils
       /* force at `is_super = false` simple gates considered as supergates.
        * This is necessary to not have duplicates since tech_library
        * computes indipendently the permutations for simple gates.
-       * Moreover simple gates permutations could be are incomplete in SUPER
-       * libraries constrained by number of gates. */
+       * Moreover simple gates permutations could be incomplete in SUPER
+       * libraries which are constrained by the number of gates. */
       bool is_super_verified = g.is_super;
       if ( simple_gate )
       {
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index c16cd8409..e7059668d 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -124,6 +124,8 @@ struct supergate
  * for big libraries with few symmetric gates. The template
  * parameter `NInputs` selects the maximum number of variables
  * allowed for a gate in the library.
+ * 
+ * The library can be generated also using supergates definitions.
  *
    \verbatim embed:rst
 
@@ -132,8 +134,14 @@ struct supergate
    .. code-block:: c++
 
       std::vector<gate> gates;
-      lorina::read_genlib( "file.lib", genlib_reader( gates ) );
+      lorina::read_genlib( "file.genlib", genlib_reader( gates ) );
+      // standard library
       mockturtle::tech_library lib( gates );
+
+      super_lib supergates_spec;
+      lorina::read_super( "file.super", super_reader( supergates_spec ) );
+      // library with supergates
+      mockturtle::tech_library lib_super( gates, supergates_spec );
    \endverbatim
  */
 template<unsigned NInputs = 4u, classification_type Configuration = classification_type::np_configurations>
diff --git a/include/mockturtle/views/binding_view.hpp b/include/mockturtle/views/binding_view.hpp
index 93a60db5f..f44955990 100644
--- a/include/mockturtle/views/binding_view.hpp
+++ b/include/mockturtle/views/binding_view.hpp
@@ -25,7 +25,7 @@
 
 /*!
   \file binding_view.hpp
-  \brief Implements methods to bind the network to a standard cells library
+  \brief Implements methods to bind the network to a standard cell library
 
   \author Alessandro Tempia Calvino
 */
@@ -42,6 +42,49 @@
 namespace mockturtle
 {
 
+/*! \brief Adds bindings to a technology library and mapping API methods.
+ *
+ * This view adds methods to create and manage a mapped network that
+ * implements gates contained in a technology library. This view
+ * is returned by the technology mapping command `map`. It can be used
+ * to report statistics about the network and write the network into
+ * a verilog file. It always adds the functions `has_binding`,
+ * `remove_binding`, `add_binding`, `add_binding_with_check`, `get_binding`,
+ * `get_binding_index`, `get_library`, `compute_area`, `compute_worst_delay`,
+ * `report_stats`, and `report_gates_usage`.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `foreach_node`
+ * - `foreach_fanin`
+ * - `is_constant`
+ * - `is_pi`
+ *
+ * Example
+ *
+   \verbatim embed:rst
+
+   .. code-block:: c++
+
+      // create network somehow
+      aig_network aig = ...;
+
+      // read cell library in genlib format
+      std::vector<gate> gates;
+      lorina::read_genlib( "file.genlib", genlib_reader( gates ) )
+      tech_library tech_lib( gates );
+
+      // call technology mapping to obtain the view
+      binding_view<klut_network> res = map( aig, tech_lib );
+
+      // prints stats and gates usage
+      res.report_stats();
+      res.report_gates_usage();
+
+      // write the mapped network in verilog
+      write_verilog( res, "file.v" );
+   \endverbatim
+ */
 template<class Ntk>
 class binding_view : public Ntk
 {
@@ -55,6 +98,10 @@ class binding_view : public Ntk
       , _bindings( *this )
   {
     static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+    static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+    static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+    static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+    static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
   }
 
   explicit binding_view( Ntk const& ntk, std::vector<gate> const& library )
@@ -63,6 +110,10 @@ class binding_view : public Ntk
       , _bindings( *this )
   {
     static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+    static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+    static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+    static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+    static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
   }
 
   binding_view<Ntk>& operator=( binding_view<Ntk> const& binding_ntk )

From 4f7b9beaa18dabcca6e5c244b213ef8e67429347 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Thu, 12 Aug 2021 16:02:04 +0200
Subject: [PATCH 35/40] Updates to the documentation

---
 docs/algorithms/mapper.rst          | 16 ++++++++--------
 docs/utils/util_data_structures.rst |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst
index ebc4d97a9..f1aa9b66b 100644
--- a/docs/algorithms/mapper.rst
+++ b/docs/algorithms/mapper.rst
@@ -1,14 +1,14 @@
-Mapper
-------
+Technology mapping and network conversion
+-----------------------------------------
 
 **Header:** ``mockturtle/algorithms/mapper.hpp``
 
-A versatile mapper that supports technology mapping and graph mapping.
-The mapper is independent of the underlying graph representation. Hence,
-it supports generic subject graph representations (e.g., AIG, and MIG)
-and a generic target representation (e.g. cell library, XMG).
-The mapper aims at finding a good mapping with respect to delay, area,
-and switching power.
+A versatile mapper that supports technology mapping and graph mapping
+(optimized network conversion). The mapper is independent of the
+underlying graph representation. Hence, it supports generic subject
+graph representations (e.g., AIG, and MIG) and a generic target
+representation (e.g. cell library, XMG). The mapper aims at finding a
+good mapping with respect to delay, area, and switching power.
 
 The mapper uses a library (hash table) to facilitate Boolean matching.
 For technology mapping, it needs `tech_library` while for graph mapping
diff --git a/docs/utils/util_data_structures.rst b/docs/utils/util_data_structures.rst
index d09ecbaa7..4fb8de139 100644
--- a/docs/utils/util_data_structures.rst
+++ b/docs/utils/util_data_structures.rst
@@ -65,8 +65,8 @@ Exact Library
 .. doxygenclass:: mockturtle::exact_library
    :members:
 
-Super Utils
-~~~~~~~~~~~
+Supergates utils
+~~~~~~~~~~~~~~~~
 
 **Header:** ``mockturtle/utils/super_utils.hpp``
 

From 1cc8d56b166a3366cedd5d3a0d39a43a2dbf4ac9 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 23 Aug 2021 11:44:20 +0200
Subject: [PATCH 36/40] Minor fixes and error reporting in mapper

---
 include/mockturtle/algorithms/mapper.hpp  | 64 +++++++++++++++++------
 include/mockturtle/utils/tech_library.hpp | 12 ++---
 2 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index d7d3e2127..4a850b7e8 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -44,6 +44,7 @@
 #include "../views/binding_view.hpp"
 #include "../views/depth_view.hpp"
 #include "../views/topo_view.hpp"
+#include "cleanup.hpp"
 #include "cut_enumeration.hpp"
 #include "cut_enumeration/exact_map_cut.hpp"
 #include "cut_enumeration/tech_map_cut.hpp"
@@ -1752,14 +1753,20 @@ class exact_map_impl
     /* compute mapping delay */
     if ( !ps.skip_delay_round )
     {
-      compute_mapping<false>();
+      if ( !compute_mapping<false>() )
+      {
+        return res;
+      }
     }
 
     /* compute mapping using global area flow */
     while ( iteration < ps.area_flow_rounds + 1 )
     {
       compute_required_time();
-      compute_mapping<true>();
+      if ( !compute_mapping<true>() )
+      {
+        return res;
+      }
     }
 
     /* compute mapping using exact area */
@@ -1767,9 +1774,19 @@ class exact_map_impl
     {
       compute_required_time();
       if ( ps.enable_logic_sharing && iteration == ps.ela_rounds + ps.area_flow_rounds )
-        compute_exact_area_aggressive( res, old2new );
+      {
+        if ( !compute_exact_area_aggressive( res, old2new ) )
+        {
+          return res;
+        }
+      }
       else
-        compute_exact_area();
+      {
+        if ( !compute_exact_area() )
+        {
+          return res;
+        }
+      }
     }
 
     /* generate the output network using the computed mapping */
@@ -1871,7 +1888,7 @@ class exact_map_impl
   }
 
   template<bool DO_AREA>
-  void compute_mapping()
+  bool compute_mapping()
   {
     for ( auto const& n : top_order )
     {
@@ -1889,7 +1906,7 @@ class exact_map_impl
     }
 
     double area_old = area;
-    set_mapping_refs<false>();
+    bool success = set_mapping_refs<false>();
 
     /* round stats */
     if ( ps.verbose )
@@ -1910,9 +1927,11 @@ class exact_map_impl
       }
       st.round_stats.push_back( stats.str() );
     }
+
+    return success;
   }
 
-  void compute_exact_area()
+  bool compute_exact_area()
   {
     for ( auto const& n : top_order )
     {
@@ -1943,7 +1962,7 @@ class exact_map_impl
     }
 
     double area_old = area;
-    set_mapping_refs<true>();
+    bool success = set_mapping_refs<true>();
 
     /* round stats */
     if ( ps.verbose )
@@ -1953,6 +1972,8 @@ class exact_map_impl
       stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
       st.round_stats.push_back( stats.str() );
     }
+
+    return success;
   }
 
   void finalize_cover( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
@@ -2009,7 +2030,7 @@ class exact_map_impl
   }
 
   template<bool ELA>
-  void set_mapping_refs()
+  bool set_mapping_refs()
   {
     const auto coef = 1.0f / ( 2.0f + ( iteration + 1 ) * ( iteration + 1 ) );
 
@@ -2046,7 +2067,11 @@ class exact_map_impl
     {
       const auto index = ntk.node_to_index( *it );
       /* skip constants and PIs */
-      if ( ntk.is_pi( *it ) )
+      if ( ntk.is_constant( *it ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( *it ) )
       {
         if ( node_match[index].map_refs[1] > 0u )
         {
@@ -2055,10 +2080,6 @@ class exact_map_impl
         }
         continue;
       }
-      else if ( ntk.is_constant( *it ) )
-      {
-        continue;
-      }
 
       if ( node_match[index].map_refs[2] == 0u )
         continue;
@@ -2066,6 +2087,14 @@ class exact_map_impl
       auto& node_data = node_match[index];
       unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
 
+      if ( node_data.best_supergate[use_phase] == nullptr )
+      {
+        /* Library is not complete, mapping is not possible */
+        std::cerr << "[i] MAP ERROR: library is not complete, impossible to perform mapping" << std::endl;
+        st.mapping_error = true;
+        return false;
+      }
+
       if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
       {
         if constexpr ( !ELA )
@@ -2122,6 +2151,7 @@ class exact_map_impl
     }
 
     ++iteration;
+    return true;
   }
 
   void compute_required_time()
@@ -2449,7 +2479,7 @@ class exact_map_impl
     }
   }
 
-  void compute_exact_area_aggressive( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
+  bool compute_exact_area_aggressive( NtkDest& res, node_map<signal<NtkDest>, Ntk>& old2new )
   {
     depth_view<NtkDest> res_d{ res };
 
@@ -2540,7 +2570,7 @@ class exact_map_impl
     }
     
     double area_old = area;
-    set_mapping_refs<true>();
+    bool success = set_mapping_refs<true>();
 
     /* round stats */
     if ( ps.verbose )
@@ -2550,6 +2580,8 @@ class exact_map_impl
       stats << fmt::format( "[i] Area RW  : Delay = {:>12.2f}  Area = {:>12.2f}  {:>5.2f} %\n", delay, area, area_gain );
       st.round_stats.push_back( stats.str() );
     }
+
+    return success;
   }
 
   signal<NtkDest> match_phase_exact_aggressive( depth_view<NtkDest>& res, node_map<signal<NtkDest>, Ntk>& old2new, node<Ntk> const& n, uint8_t phase )
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index e7059668d..29c35c24e 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -275,7 +275,7 @@ class tech_library
       {
         const auto on_np = [&]( auto const& tt, auto neg, auto const& perm ) {
           supergate<NInputs> sg = {&gate,
-                                  gate.area,
+                                  static_cast<float>( gate.area ),
                                   {},
                                   perm,
                                   0};
@@ -419,10 +419,10 @@ class tech_library
           std::iota( perm.begin(), perm.end(), 0u );
 
           supergate<NInputs> sg = {&gate,
-                                  gate.area,
+                                  static_cast<float>( gate.area ),
                                   {},
                                   perm,
-                                  neg};
+                                  static_cast<uint8_t>( neg )};
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
@@ -481,10 +481,10 @@ class tech_library
           for( auto phase : phases )
           {
             supergate<NInputs> sg = {&gate,
-                                    gate.area,
+                                    static_cast<float>( gate.area ),
                                     {},
                                     perm,
-                                    phase};
+                                    static_cast<uint8_t>( phase )};
 
             for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
             {
@@ -782,7 +782,7 @@ class exact_library
 
         for ( auto const& gate : pair.second )
         {
-          printf( "%.2f,%.2f,%d,%d,:", gate.worstDelay, gate.area, gate.polarity, gate.n_inputs );
+          printf( "%.2f,%.2f,%x,%d,:", gate.worstDelay, gate.area, gate.polarity, gate.n_inputs );
           for ( auto j = 0u; j < NInputs; ++j )
             printf( "%.2f/", gate.tdelay[j] );
           std::cout << " ";

From 76bda996dcf187cf7239616ade9c016034d72495 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 23 Aug 2021 11:54:11 +0200
Subject: [PATCH 37/40] Small fix

---
 include/mockturtle/utils/super_utils.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index c249cfe85..9ae696875 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -71,7 +71,7 @@ struct composed_gate
   kitty::dynamic_truth_table function;
 
   /* area */
-  double area{ 0.0f };
+  double area{ 0.0 };
 
   /* pin-to-pin delays */
   std::array<float, NInputs> tdelay{};
@@ -208,7 +208,7 @@ class super_utils
                                                        nullptr,
                                                        0,
                                                        tt,
-                                                       0.0f,
+                                                       0.0,
                                                        {},
                                                        {}} );
     }

From 634f67348c387e9d6c05061bd378be2925d78f33 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 14 Mar 2022 17:55:40 +0100
Subject: [PATCH 38/40] bux fix in AQFP network

---
 include/mockturtle/networks/aqfp.hpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/mockturtle/networks/aqfp.hpp b/include/mockturtle/networks/aqfp.hpp
index 087fad2f2..1ed952e1a 100644
--- a/include/mockturtle/networks/aqfp.hpp
+++ b/include/mockturtle/networks/aqfp.hpp
@@ -28,6 +28,7 @@
   \brief AQFP network implementation
 
   \author Dewmini Marakkalage
+  \author Alessandro Tempia Calvino
 */
 
 #pragma once
@@ -521,7 +522,8 @@ class aqfp_network
 
     std::vector<signal> old_children;
 
-    for ( auto i = 0u; i <= node.children.size(); ++i )
+    size_t i;
+    for ( i = 0u; i <= node.children.size(); ++i )
     {
       if ( i == node.children.size() )
       {
@@ -533,11 +535,19 @@ class aqfp_network
       if ( node.children[i].index == old_node )
       {
         node.children[i] = node.children[i].weight ? !new_signal : new_signal;
-        new_signal.complement ^= node.children[i].weight;
+        break;
       }
     }
 
-    /* TODO: Do the simplifications if possible */
+    while ( ++i < node.children.size() )
+    {
+      old_children.push_back( signal{ node.children[i] } );
+    }
+
+    /* TODO: Do the simplifications if possible and ordering */
+
+    // update the reference counter of the new signal
+    _storage->nodes[new_signal.index].data[0].h1++;
 
     for ( auto const& fn : _events->on_modified )
     {

From 799aafe1244d6bfabddc456b05ff0a73c2e2fc34 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Mon, 14 Mar 2022 18:13:27 +0100
Subject: [PATCH 39/40] fix in binding view

---
 include/mockturtle/views/binding_view.hpp |  4 ++--
 test/views/binding_view.cpp               | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mockturtle/views/binding_view.hpp b/include/mockturtle/views/binding_view.hpp
index f44955990..7326c21da 100644
--- a/include/mockturtle/views/binding_view.hpp
+++ b/include/mockturtle/views/binding_view.hpp
@@ -239,7 +239,7 @@ class binding_view : public Ntk
       {
         float tot_gate_area = gates_profile[i] * _library[i].area;
 
-        os << fmt::format( "[i] {:<15}", _library[i].name )
+        os << fmt::format( "[i] {:<25}", _library[i].name )
            << fmt::format( "\t Instance = {:>10d}", gates_profile[i] )
            << fmt::format( "\t Area = {:>12.2f}", tot_gate_area )
            << fmt::format( " {:>8.2f} %\n", tot_gate_area / area * 100 );
@@ -248,7 +248,7 @@ class binding_view : public Ntk
       }
     }
 
-    os << fmt::format( "[i] {:<15}", "TOTAL" )
+    os << fmt::format( "[i] {:<25}", "TOTAL" )
        << fmt::format( "\t Instance = {:>10d}", tot_instances )
        << fmt::format( "\t Area = {:>12.2f}   100.00 %\n", area );
   }
diff --git a/test/views/binding_view.cpp b/test/views/binding_view.cpp
index c10703a5a..114da60a8 100644
--- a/test/views/binding_view.cpp
+++ b/test/views/binding_view.cpp
@@ -80,11 +80,11 @@ TEST_CASE( "Create binding view", "[binding_view]" )
   std::stringstream report_gates;
   ntk.report_gates_usage( report_gates );
   CHECK( report_gates.str() == "[i] Report gates usage:\n"
-                               "[i] zero           \t Instance =          1\t Area =         0.00     0.00 %\n"
-                               "[i] inverter       \t Instance =          1\t Area =         1.00     6.25 %\n"
-                               "[i] and            \t Instance =          2\t Area =        10.00    62.50 %\n"
-                               "[i] or             \t Instance =          1\t Area =         5.00    31.25 %\n"
-                               "[i] TOTAL          \t Instance =          5\t Area =        16.00   100.00 %\n" );
+                               "[i] zero                     \t Instance =          1\t Area =         0.00     0.00 %\n"
+                               "[i] inverter                 \t Instance =          1\t Area =         1.00     6.25 %\n"
+                               "[i] and                      \t Instance =          2\t Area =        10.00    62.50 %\n"
+                               "[i] or                       \t Instance =          1\t Area =         5.00    31.25 %\n"
+                               "[i] TOTAL                    \t Instance =          5\t Area =        16.00   100.00 %\n" );
 }
 
 TEST_CASE( "Binding view on copy", "[binding_view]" )

From 72118f81507cc7ab2f8a2dc96f9c8313c90c5915 Mon Sep 17 00:00:00 2001
From: aletempiac <alessandro.tempia@gmail.com>
Date: Tue, 15 Mar 2022 16:40:06 +0100
Subject: [PATCH 40/40] Removed random truth table in akers_synthesis test
 cases, AQFP network fix

---
 include/mockturtle/networks/aqfp.hpp | 21 ++++++++-------------
 test/algorithms/akers_synthesis.cpp  | 10 +++++++---
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/mockturtle/networks/aqfp.hpp b/include/mockturtle/networks/aqfp.hpp
index 1ed952e1a..e785e0788 100644
--- a/include/mockturtle/networks/aqfp.hpp
+++ b/include/mockturtle/networks/aqfp.hpp
@@ -522,33 +522,28 @@ class aqfp_network
 
     std::vector<signal> old_children;
 
-    size_t i;
-    for ( i = 0u; i <= node.children.size(); ++i )
+    bool replacement = false;
+    for ( size_t i = 0u; i < node.children.size(); ++i )
     {
-      if ( i == node.children.size() )
-      {
-        return std::nullopt;
-      }
-
       old_children.push_back( signal{ node.children[i] } );
 
       if ( node.children[i].index == old_node )
       {
         node.children[i] = node.children[i].weight ? !new_signal : new_signal;
-        break;
+        replacement = true;
+
+        // update the reference counter of the new signal
+        _storage->nodes[new_signal.index].data[0].h1++;
       }
     }
 
-    while ( ++i < node.children.size() )
+    if ( !replacement )
     {
-      old_children.push_back( signal{ node.children[i] } );
+      return std::nullopt;
     }
 
     /* TODO: Do the simplifications if possible and ordering */
 
-    // update the reference counter of the new signal
-    _storage->nodes[new_signal.index].data[0].h1++;
-
     for ( auto const& fn : _events->on_modified )
     {
       (*fn)( n, old_children );
diff --git a/test/algorithms/akers_synthesis.cpp b/test/algorithms/akers_synthesis.cpp
index 7077f0dc1..e9fb3ce91 100644
--- a/test/algorithms/akers_synthesis.cpp
+++ b/test/algorithms/akers_synthesis.cpp
@@ -89,6 +89,8 @@ TEST_CASE( "Check Akers for MAJ-5 in XMG", "[akers_synthesis]" )
 
 TEST_CASE( "Check Akers for random - 4 inputs", "[akers_synthesis]" )
 {
+  std::array<std::string, 5> tts = { "d5d0", "fe52", "ad1b", "401a", "79e2" };
+
   for ( auto y = 0; y < 5; y++ )
   {
     std::vector<kitty::dynamic_truth_table> xs{6, kitty::dynamic_truth_table( 4 )};
@@ -97,7 +99,7 @@ TEST_CASE( "Check Akers for random - 4 inputs", "[akers_synthesis]" )
     kitty::create_nth_var( xs[4], 2 );
     kitty::create_nth_var( xs[5], 3 );
 
-    create_random( xs[0] );
+    create_from_hex_string( xs[0], tts[y] );
 
     for ( auto i = 0u; i < unsigned( xs[0].num_bits() ); i++ )
     {
@@ -134,6 +136,8 @@ TEST_CASE( "Check Akers for random - 4 inputs", "[akers_synthesis]" )
 
 TEST_CASE( "Check Akers for random - 5 inputs", "[akers_synthesis]" )
 {
+  std::array<std::string, 5> tts = { "e3cee67b", "bb5bee39", "b220ff4c", "fa43751f", "9ec83bf4" };
+
   for ( auto y = 0; y < 5; y++ )
   {
     std::vector<kitty::dynamic_truth_table> xs{7, kitty::dynamic_truth_table( 5 )};
@@ -143,7 +147,7 @@ TEST_CASE( "Check Akers for random - 5 inputs", "[akers_synthesis]" )
     kitty::create_nth_var( xs[5], 3 );
     kitty::create_nth_var( xs[6], 4 );
 
-    create_random( xs[0] );
+    create_from_hex_string( xs[0], tts[y] );
 
     for ( auto i = 0u; i < unsigned( xs[0].num_bits() ); i++ )
     {
@@ -190,7 +194,7 @@ TEST_CASE( "Check Akers for random - 6 inputs", "[akers_synthesis]" )
     kitty::create_nth_var( xs[6], 4 );
     kitty::create_nth_var( xs[7], 5 );
 
-    create_random( xs[0] );
+    create_from_hex_string( xs[0], "32b43db39dde2b16" );
 
     for ( auto i = 0u; i < unsigned( xs[0].num_bits() ); i++ )
     {