diff --git a/recon_test_pack/root_header.hroot b/recon_test_pack/root_header.hroot
index afc654912..d90108dc4 100644
--- a/recon_test_pack/root_header.hroot
+++ b/recon_test_pack/root_header.hroot
@@ -14,6 +14,7 @@ View offset (degrees)					 := 0
 Maximum number of (unmashed) TOF time bins := 5
 Size of unmashed TOF time bins (ps)        := 820.0
 TOF timing resolution (ps)                 := 400.0
+TOF mashing factor:= 1
 
 GATE scanner type := GATE_Cylindrical_PET
 GATE_Cylindrical_PET Parameters :=
diff --git a/src/IO/InterfileHeader.cxx b/src/IO/InterfileHeader.cxx
index ccafaa89c..bbcfa195c 100644
--- a/src/IO/InterfileHeader.cxx
+++ b/src/IO/InterfileHeader.cxx
@@ -612,19 +612,19 @@ InterfilePDFSHeader::InterfilePDFSHeader()
   reference_energy = -1.f;
   add_key("Reference energy (in keV)", &reference_energy);
 
-  max_num_timing_poss = -1;
+  max_num_timing_poss = 1;
   add_key("Maximum number of (unmashed) TOF time bins", &max_num_timing_poss);
 #if STIR_VERSION < 070000
   add_alias_key("Maximum number of (unmashed) TOF time bins", "Number of TOF time bins");
 #endif
   timing_poss_sequence.clear();
   add_key("TOF bin order", &timing_poss_sequence);
-  size_of_timing_pos = -1.f;
+  size_of_timing_pos = 0.0f;
   add_key("Size of unmashed TOF time bins (ps)", &size_of_timing_pos);
 #if STIR_VERSION < 070000
   add_alias_key("Size of unmashed TOF time bins (ps)", "Size of timing bin (ps)");
 #endif
-  timing_resolution = -1.f;
+  timing_resolution = 0.0f;
   add_key("TOF timing resolution (ps)", &timing_resolution);
 #if STIR_VERSION < 070000
   add_alias_key("TOF timing resolution (ps)", "timing resolution (ps)");
diff --git a/src/IO/interfile.cxx b/src/IO/interfile.cxx
index e7565fb54..e4251b112 100644
--- a/src/IO/interfile.cxx
+++ b/src/IO/interfile.cxx
@@ -1201,7 +1201,7 @@ write_basic_interfile_PDFS_header(const string& header_file_name, const string&
 
   // it's PET data if we get here
   // N.E. Added timing locations
-  const bool is_TOF = pdfs.get_proj_data_info_sptr()->get_num_tof_poss() > 1;
+  const bool is_TOF = pdfs.get_proj_data_info_sptr()->is_tof_data();
   output_header << "number of dimensions := " + std::to_string(is_TOF ? 5 : 4) + "\n";
 
   // TODO support more ?
@@ -1225,12 +1225,16 @@ write_basic_interfile_PDFS_header(const string& header_file_name, const string&
      */
       {
         case ProjDataFromStream::Segment_View_AxialPos_TangPos: {
+          if (is_TOF) // TOF data with 1 timing position
+            order_of_timing_poss = 5;
           order_of_segment = 4;
           order_of_view = 3;
           order_of_z = 2;
           break;
         }
         case ProjDataFromStream::Segment_AxialPos_View_TangPos: {
+          if (is_TOF) // TOF data with 1 timing position
+            order_of_timing_poss = 5;
           order_of_segment = 4;
           order_of_view = 2;
           order_of_z = 3;
@@ -1252,8 +1256,7 @@ write_basic_interfile_PDFS_header(const string& header_file_name, const string&
     if (order_of_timing_poss > 0)
       {
         output_header << "matrix axis label [" << order_of_timing_poss << "] := timing positions\n";
-        output_header << "!matrix size [" << order_of_timing_poss << "] := " << pdfs.get_timing_poss_sequence_in_stream().size()
-                      << "\n";
+        output_header << "!matrix size [" << order_of_timing_poss << "] := " << pdfs.get_num_tof_poss() << "\n";
       }
 
     output_header << "matrix axis label [" << order_of_segment << "] := segment\n";
diff --git a/src/buildblock/FilePath.cxx b/src/buildblock/FilePath.cxx
index 598e6dcda..f1948c510 100644
--- a/src/buildblock/FilePath.cxx
+++ b/src/buildblock/FilePath.cxx
@@ -72,7 +72,7 @@ FilePath::is_directory() const
   struct stat info;
 
   if (stat(my_string.c_str(), &info) != 0)
-    error(boost::format("FilePath: Cannot access %1%.") % my_string);
+    warning(boost::format("FilePath: Cannot access %1%.") % my_string);
   else if (info.st_mode & S_IFDIR)
     return true;
 #endif
@@ -92,7 +92,7 @@ FilePath::is_regular_file() const
   struct stat info;
 
   if (stat(my_string.c_str(), &info) != 0)
-    error(boost::format("FilePath: Cannot access %1%.") % my_string);
+    warning(boost::format("FilePath: Cannot access %1%.") % my_string);
   else if (info.st_mode & S_IFREG)
     return true;
 #endif
diff --git a/src/buildblock/ML_norm.cxx b/src/buildblock/ML_norm.cxx
index bc8c10910..57e6031ac 100644
--- a/src/buildblock/ML_norm.cxx
+++ b/src/buildblock/ML_norm.cxx
@@ -29,6 +29,10 @@
 #include "stir/stream.h"
 #include "stir/warning.h"
 #include "stir/error.h"
+#include "stir/NumericType.h"
+#include "stir/ByteOrder.h"
+#include "stir/IO/read_data.h"
+#include "stir/FilePath.h"
 
 #ifdef STIR_OPENMP
 #  include <omp.h>
@@ -40,6 +44,69 @@ using std::max;
 
 START_NAMESPACE_STIR
 
+        //! TODO: Move this function somewhere else. All cache functions should be together
+        std::string get_cache_path()
+{
+  return FilePath::get_current_working_directory();
+}
+
+//! TODO: Move this function somewhere else. All cache functions should be together
+std::string get_cache_filename(unsigned int file_id)
+{
+  std::string cache_filename = "my_CACHE" + std::to_string(file_id) + ".bin";
+  FilePath icache(cache_filename, false);
+  icache.prepend_directory_name(get_cache_path());
+  return icache.get_as_string();
+}
+
+//! TODO: Move this function somewhere else. All cache functions should be together
+bool load_listmode_cache_file(
+    unsigned int file_id, std::vector<BinAndCorr>& record_cache)
+{
+  FilePath icache(get_cache_filename(file_id), false);
+
+  record_cache.clear();
+
+  if (icache.is_regular_file())
+    {
+      info(boost::format("Loading Listmode cache from disk %1%") % icache.get_as_string());
+      std::ifstream fin(icache.get_as_string(), std::ios::in | std::ios::binary | std::ios::ate);
+
+      const std::size_t num_records = fin.tellg() / sizeof(Bin);
+      try
+        {
+          record_cache.reserve(num_records + 1); // add 1 to avoid reallocation when overruning (see below)
+        }
+      catch (...)
+        {
+          error("Listmode: cannot allocate cache for " + std::to_string(num_records + 1) + " records");
+        }
+      if (!fin)
+        error("Error opening cache file \"" + icache.get_as_string() + "\" for reading.");
+
+      fin.clear();
+      fin.seekg(0);
+
+      while (!fin.eof())
+        {
+          BinAndCorr tmp;
+          fin.read((char*)&tmp, sizeof(Bin));
+          record_cache.push_back(tmp);
+        }
+      // The while will push one junk record
+      record_cache.pop_back();
+      fin.close();
+    }
+  else
+    {
+      warning("Cannot find Listmode cache on disk. Please recompute it or do not set the  max cache size. Abort.");
+      return false; // need to return something to avoid compiler warning
+    }
+
+  info(boost::format("Cached Events: %1% ") % record_cache.size(), 2);
+  return true;
+}
+
 DetPairData::DetPairData()
 {}
 
@@ -377,10 +444,15 @@ iterate_efficiencies(Array<1, float>& efficiencies, const Array<1, float>& data_
       else
         {
           // const float denominator = inner_product(efficiencies,model[a]);
-          float denominator = 0;
+          double denominator = 0;
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for reduction(+ : denominator)
+#  endif
+#endif
           for (int b = model.get_min_index(a); b <= model.get_max_index(a); ++b)
-            denominator += efficiencies[b % num_detectors] * model(a, b);
-          efficiencies[a] = data_fan_sums[a] / denominator;
+            denominator += static_cast<double>(efficiencies[b % num_detectors] * model(a, b));
+          efficiencies[a] = data_fan_sums[a] / static_cast<float>(denominator);
         }
     }
 }
@@ -1048,6 +1120,8 @@ set_det_pair_data(ProjData& proj_data, const DetPairData& det_pair_data, const i
     }
 }
 
+
+
 /// **** This function make fan_data from projecion file while removing the intermodule gaps **** ////
 /// *** fan_data doesn't have gaps, proj_data has gaps *** ///
 template <class TProjDataInfo>
@@ -1134,6 +1208,313 @@ make_fan_data_remove_gaps_help(FanProjData& fan_data,
     }
 }
 
+
+void
+load_fan_data(FanProjData& fan_data, const ProjData& proj_data,
+              const std::string fan_filename)
+{
+  int num_rings;
+  int num_detectors_per_ring;
+  int fan_size;
+  int max_delta;
+
+  if (proj_data.get_proj_data_info_sptr()->is_tof_data())
+    error("make_fan_data: Incompatible with TOF data. Abort.");
+
+  const ProjDataInfo& proj_data_info = *proj_data.get_proj_data_info_sptr();
+  get_fan_info(num_rings, num_detectors_per_ring, max_delta, fan_size, proj_data_info);
+
+  // if (proj_data.get_proj_data_info_sptr()->get_scanner_ptr()->get_scanner_geometry() == "Cylindrical")
+  //   {
+  //     auto proj_data_info_ptr = dynamic_cast<const ProjDataInfoCylindricalNoArcCorr* const>(&proj_data_info);
+  //   }
+  // else
+  //   {
+  //     auto proj_data_info_ptr = dynamic_cast<const ProjDataInfoBlocksOnCylindricalNoArcCorr* const>(&proj_data_info);
+  //   }
+
+  if (proj_data.get_proj_data_info_sptr()->is_tof_data())
+    error("make_fan_data: Incompatible with TOF data. Abort.");
+
+  const int half_fan_size = fan_size / 2;
+  const int num_virtual_axial_crystals_per_block = proj_data_info.get_scanner_sptr()->get_num_virtual_axial_crystals_per_block();
+
+  const int num_virtual_transaxial_crystals_per_block
+      = proj_data_info.get_scanner_sptr()->get_num_virtual_transaxial_crystals_per_block();
+
+  const int num_transaxial_blocks = proj_data_info.get_scanner_sptr()->get_num_transaxial_blocks();
+  const int num_axial_blocks = proj_data_info.get_scanner_sptr()->get_num_axial_blocks();
+  const int num_transaxial_crystals_per_block = proj_data_info.get_scanner_sptr()->get_num_transaxial_crystals_per_block();
+  const int num_axial_crystals_per_block = proj_data_info.get_scanner_sptr()->get_num_axial_crystals_per_block();
+
+         // const int num_physical_transaxial_crystals_per_block
+         //     = num_transaxial_crystals_per_block - num_virtual_transaxial_crystals_per_block;
+
+         // const int num_physical_axial_crystals_per_block = num_axial_crystals_per_block - num_virtual_axial_crystals_per_block;
+
+  const int num_transaxial_blocks_in_fansize = fan_size / (num_transaxial_crystals_per_block);
+  const int new_fan_size = fan_size - num_transaxial_blocks_in_fansize * num_virtual_transaxial_crystals_per_block;
+  const int new_half_fan_size = new_fan_size / 2;
+  const int num_axial_blocks_in_max_delta = max_delta / (num_axial_crystals_per_block);
+  const int new_max_delta = max_delta - (num_axial_blocks_in_max_delta)*num_virtual_axial_crystals_per_block;
+  const int num_physical_detectors_per_ring
+      = num_detectors_per_ring - num_transaxial_blocks * num_virtual_transaxial_crystals_per_block;
+  const int num_physical_rings = num_rings - (num_axial_blocks - 1) * num_virtual_axial_crystals_per_block;
+  fan_data = FanProjData(num_physical_rings, num_physical_detectors_per_ring, new_max_delta, 2 * new_half_fan_size + 1);
+
+  info("Loading model fansums from the disk ...");
+  shared_ptr<std::iostream> fan_stream(
+      new std::fstream(fan_filename, std::ios::in | std::ios::binary));
+
+  float scale = 1.f;
+
+  info("Reading  from  disk...");
+  if(read_data(*fan_stream, fan_data, NumericType::Type::FLOAT,scale,
+                ByteOrder::Order::little_endian) == Succeeded::no)
+    error("Error writing FanProjData\n");
+
+}
+
+double
+make_all_fan_data_from_cache(
+                             FanProjData& fan_data,
+                             const ProjData& proj_data,
+    const FanProjData& model)
+{
+
+  int num_rings;
+  int num_detectors_per_ring;
+  int fan_size;
+  int max_delta;
+  // get_fan_info(num_rings, num_detectors_per_ring, max_delta, fan_size, *proj_data.get_proj_data_info_sptr());
+
+  auto proj_data_info_ptr
+      = dynamic_cast<const ProjDataInfoCylindricalNoArcCorr* const>(&(*proj_data.get_proj_data_info_sptr()));
+  get_fan_info(num_rings, num_detectors_per_ring, max_delta, fan_size, *proj_data_info_ptr);
+
+  double return_value = 0;
+
+  const int half_fan_size = fan_size / 2;
+  const int num_virtual_axial_crystals_per_block = proj_data_info_ptr->get_scanner_sptr()->get_num_virtual_axial_crystals_per_block();
+
+  const int num_virtual_transaxial_crystals_per_block
+      = proj_data_info_ptr->get_scanner_sptr()->get_num_virtual_transaxial_crystals_per_block();
+
+  const int num_transaxial_blocks = proj_data_info_ptr->get_scanner_sptr()->get_num_transaxial_blocks();
+  const int num_axial_blocks = proj_data_info_ptr->get_scanner_sptr()->get_num_axial_blocks();
+  const int num_transaxial_crystals_per_block = proj_data_info_ptr->get_scanner_sptr()->get_num_transaxial_crystals_per_block();
+  const int num_axial_crystals_per_block = proj_data_info_ptr->get_scanner_sptr()->get_num_axial_crystals_per_block();
+
+  const int num_physical_transaxial_crystals_per_block
+      = num_transaxial_crystals_per_block - num_virtual_transaxial_crystals_per_block;
+
+  const int num_physical_axial_crystals_per_block = num_axial_crystals_per_block - num_virtual_axial_crystals_per_block;
+
+  const int num_transaxial_blocks_in_fansize = fan_size / (num_transaxial_crystals_per_block);
+  const int new_fan_size = fan_size - num_transaxial_blocks_in_fansize * num_virtual_transaxial_crystals_per_block;
+  const int new_half_fan_size = new_fan_size / 2;
+  const int num_axial_blocks_in_max_delta = max_delta / (num_axial_crystals_per_block);
+  const int new_max_delta = max_delta - (num_axial_blocks_in_max_delta)*num_virtual_axial_crystals_per_block;
+  const int num_physical_detectors_per_ring
+      = num_detectors_per_ring - num_transaxial_blocks * num_virtual_transaxial_crystals_per_block;
+  const int num_physical_rings = num_rings - (num_axial_blocks - 1) * num_virtual_axial_crystals_per_block;
+  fan_data = FanProjData(num_physical_rings, num_physical_detectors_per_ring, new_max_delta, 2 * new_half_fan_size + 1);
+
+  fan_data.fill(0);
+
+  int ibatch = 0;
+  long int used_events = 0;
+  std::vector<BinAndCorr> record_cache;
+
+  while(true)//While we keep getting cache files.
+    {
+
+      if (!load_listmode_cache_file(ibatch, record_cache))
+        {
+          info("No more cache files in directory. Finished");
+          break;
+        }
+
+#ifdef STIR_OPENMP
+#  pragma omp for schedule(dynamic)
+#endif
+      for(long int ievent = 0; ievent < static_cast<long>(record_cache.size()); ++ievent)
+        {
+
+          auto& record = record_cache.at(ievent);
+          if (record.my_bin.get_bin_value() == 0.0f) // shouldn't happen really, but a check probably doesn't hurt
+            continue;
+
+          if (used_events % 1000000L == 0)
+            std::cout << "\r" << used_events << " events used" << std::flush;
+
+          const Bin& measured_bin = record.my_bin;
+          int ra = 0, a = 0;
+          int rb = 0, b = 0;
+
+          proj_data_info_ptr->get_det_pair_for_bin(a, ra, b, rb, measured_bin);
+
+          // I don't think we need this.
+          int a_in_block = a % num_transaxial_crystals_per_block;
+          if (a_in_block >= num_physical_transaxial_crystals_per_block)
+            continue;
+          int new_a = a - (a / num_transaxial_crystals_per_block) * num_virtual_transaxial_crystals_per_block;
+
+          int ra_in_block = ra % num_axial_crystals_per_block;
+          if (ra_in_block >= num_physical_axial_crystals_per_block)
+            continue;
+          int new_ra = ra - (ra / num_axial_crystals_per_block) * num_virtual_axial_crystals_per_block;
+
+          int b_in_block = b % num_transaxial_crystals_per_block;
+          if (b_in_block >= num_physical_transaxial_crystals_per_block)
+            continue;
+          int new_b = b - (b / num_transaxial_crystals_per_block) * num_virtual_transaxial_crystals_per_block;
+
+          int rb_in_block = rb % num_axial_crystals_per_block;
+          if (rb_in_block >= num_physical_axial_crystals_per_block)
+            continue;
+          int new_rb = rb - (rb / num_axial_crystals_per_block) * num_virtual_axial_crystals_per_block;
+
+          // std::cout << "NI" << new_ra << "  " << new_a << "  " << new_rb << " " << new_b << std::endl;
+          if (fan_data.is_in_data(new_ra, new_a, new_rb, new_b))
+            {
+              if(model(new_ra, new_a, new_rb, new_b) > 0)
+                {
+#ifdef STIR_OPENMP
+#  pragma omp critical
+#endif
+                  {
+                    used_events+=1;
+                    fan_data(new_ra, new_a, new_rb, new_b) += 1;
+                    fan_data(new_rb, new_b, new_ra, new_a) +=1;
+                  }
+                  //               if ( new_ra != mra &&  new_rb != mrb)
+                  //                 {
+                  // #ifdef STIR_OPENMP
+                  // #  pragma omp critical(FANPROJDATAWRITE)
+                  // #endif
+                  //                   try
+                  //                     {
+                  //                       fan_data( new_ra,  new_a,  new_rb,  new_b) +=1;
+                  //                       fan_data( new_ra,  ma,  new_rb,  new_b) +=1;
+                  //                       fan_data( mra,  new_a,  mrb,  new_b) +=1;
+                  //                       fan_data( mra,  ma,  mrb,  mb) +=1;
+                  //                     }
+                  //                   catch(...)
+                  //                     {}
+                  //                 }
+                  //               else
+                  //                 {
+                  // #ifdef STIR_OPENMP
+                  // #  pragma omp critical(FANPROJDATAWRITE)
+                  // #endif
+                  //                   try
+                  //                     {
+                  //                       fan_data( new_ra,  new_a,  new_rb,  new_b)  +=1;
+                  //                       fan_data( new_ra,  ma,  new_rb,  mb)  +=1;
+                  //                     }
+                  //                   catch(...)
+                  //                     {}
+                  //                 }
+                }
+            }
+        }
+
+      ibatch++;
+      record_cache.clear();
+    }
+
+  return_value = used_events;
+  record_cache.clear();
+
+//   std::vector<shared_ptr<GeoData3D>> local_geo_sptrs;
+//   int num_threads = 10;
+// #ifdef STIR_OPENMP
+// #  pragma omp single
+//   {
+//   std::cout << "Converting fan data to geo data..." << std::endl;
+//   std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
+
+//   local_geo_sptrs.resize(num_threads, shared_ptr<GeoData3D>());
+
+//   for (int i = 0; i < num_threads; i++)
+//     {
+//       std::cout << "Allocated data to geo data... " << i<< std::endl;
+//       local_geo_sptrs[i].reset(new GeoData3D(geo_data.size(),
+//                                              geo_data[0].size(),
+//                                              geo_data[0][0].size(),
+//                                              geo_data[0][0][0].size()));
+//       local_geo_sptrs[i]->fill(0);
+//     }
+//   }
+//   #endif
+
+// std::cout << "Starting geo loops... " << std::endl;
+// #ifdef STIR_OPENMP
+// #pragma omp parallel for schedule(dynamic) collapse(4) num_threads(num_threads)
+// #endif
+//   for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
+//     {
+//       for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
+//         for (int axial_block_num = 0; axial_block_num < num_axial_blocks; ++axial_block_num)
+//           {
+//             for (int transaxial_block_num = 0; transaxial_block_num < num_transaxial_blocks; ++transaxial_block_num)
+//               {
+//                 // loop rb from ra to avoid double counting
+//                 // for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
+//                 for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
+//                   for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
+//                     {
+
+// #ifdef STIR_OPENMP
+//                       const int thread_num = omp_get_thread_num();
+// #else
+//                       const int thread_num = 0;
+// #endif
+
+//                       const int transaxial_det_inc = transaxial_block_num * num_transaxial_crystals_per_block;
+//                       const int new_det_num_a = (a + transaxial_det_inc) % num_transaxial_detectors;
+//                       const int new_det_num_b = (b + transaxial_det_inc) % num_transaxial_detectors;
+//                       const int axial_det_inc = axial_block_num * num_axial_crystals_per_block;
+//                       const int new_ring_num_a = ra + axial_det_inc;
+//                       const int new_ring_num_b = rb + axial_det_inc;
+
+//                       if (fan_data.is_in_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b))
+//                         {
+// // #ifdef STIR_OPENMP
+// // #  pragma omp critical(FANPROJDATAWRITE)
+// // #endif
+//                           try
+//                             {
+//                               (*local_geo_sptrs[thread_num])(ra, a, rb, b % num_transaxial_detectors)
+//                                   += fan_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b);
+//                             }
+//                           catch(...)
+//                             {
+
+//                             }
+//                         }
+//                     }
+//               }
+//           }
+    // }
+
+//   std::cout<<"Now flattening the geo data ..." << std::endl;
+// #ifdef STIR_OPENMP
+//   // flatten data constructed by threads
+//   {
+//         for (int i = 0; i < static_cast<int>(local_geo_sptrs.size()); ++i)
+//           if (!is_null_ptr(local_geo_sptrs[i])) // only accumulate if a thread filled something in
+//             geo_data += *(local_geo_sptrs[i]);
+//   }
+// #endif
+
+        return return_value;
+// omp_set_num_threads(omp_get_max_threads());
+//   std::cout<<"Returned the omp threads to " << omp_get_num_threads() << std::endl;
+}
+
+
 void
 make_fan_data_remove_gaps(FanProjData& fan_data, const ProjData& proj_data)
 {
@@ -1342,28 +1723,52 @@ apply_geo_norm(FanProjData& fan_data, const GeoData3D& geo_data, const bool appl
   const int num_transaxial_detectors = fan_data.get_num_detectors_per_ring();
   const int num_axial_crystals_per_block = geo_data.get_num_axial_crystals_per_block();
   const int num_transaxial_crystals_per_block = geo_data.get_half_num_transaxial_crystals_per_block() * 2;
-
   const int num_transaxial_blocks = num_transaxial_detectors / num_transaxial_crystals_per_block;
   const int num_axial_blocks = num_axial_detectors / num_axial_crystals_per_block;
 
-  FanProjData work = fan_data;
-  work.fill(0);
-
-  for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
-    for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
-      // loop rb from ra to avoid double counting
-      for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
-        for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
-          {
+  std::vector<shared_ptr<FanProjData>> local_work_sptrs;
+  int num_threads = 3;
+#ifdef STIR_OPENMP
+#  pragma omp single
+  {
+    std::cout << "apply_geo_norm in parallel..." << std::endl;
+    std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
 
-            // rotation
+    local_work_sptrs.resize(num_threads, shared_ptr<FanProjData>());
 
-            for (int axial_block_num = 0; axial_block_num < num_axial_blocks; ++axial_block_num)
-              {
+    for (int i = 0; i < num_threads; i++)
+      {
+        std::cout << "Allocated data to FanProjData for thread " << i << std::endl;
+        local_work_sptrs[i].reset(new FanProjData(
+            fan_data.size(), fan_data[0].size(),
+            fan_data[0][0].size(), fan_data[0][0][0].size()));
+        local_work_sptrs[i]->fill(0);
+      }
+  }
+#endif
 
-                for (int transaxial_block_num = 0; transaxial_block_num < num_transaxial_blocks; ++transaxial_block_num)
+std::cout << "Starting  loops over geo... " << std::endl;
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for schedule(dynamic) collapse(4) num_threads(num_threads)
+#  endif
+#endif
+for (int axial_block_num = 0; axial_block_num < num_axial_blocks; ++axial_block_num)
+  {
+    for (int transaxial_block_num = 0; transaxial_block_num < num_transaxial_blocks; ++transaxial_block_num)
+      {
+        for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
+          for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
+            {
+#ifdef STIR_OPENMP
+              const int thread_num = omp_get_thread_num();
+#else
+              const int thread_num = 0;
+#endif
+              // loop rb from ra to avoid double counting
+              for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
+                for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
                   {
-
                     const int transaxial_det_inc = transaxial_block_num * num_transaxial_crystals_per_block;
                     const int new_det_num_a = (a + transaxial_det_inc) % num_transaxial_detectors;
                     const int new_det_num_b = (b + transaxial_det_inc) % num_transaxial_detectors;
@@ -1376,24 +1781,39 @@ apply_geo_norm(FanProjData& fan_data, const GeoData3D& geo_data, const bool appl
                     const int mra = num_axial_detectors - 1 - new_ring_num_a;
                     const int mrb = num_axial_detectors - 1 - new_ring_num_b;
 
-                    if (work.is_in_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b))
-                      work(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b)
+                    if ( (*local_work_sptrs[thread_num]).is_in_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b))
+                      (*local_work_sptrs[thread_num])(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b)
                           = geo_data(ra, a, rb, b % num_transaxial_detectors);
 
-                    if (work.is_in_data(new_ring_num_a, ma, new_ring_num_b, mb))
-                      work(new_ring_num_a, ma, new_ring_num_b, mb) = geo_data(ra, a, rb, b % num_transaxial_detectors);
-
-                    if (work.is_in_data(mra, new_det_num_a, mrb, new_det_num_b))
+                    if ( (*local_work_sptrs[thread_num]).is_in_data(new_ring_num_a, ma, new_ring_num_b, mb))
+                      (*local_work_sptrs[thread_num])(new_ring_num_a, ma, new_ring_num_b, mb) = geo_data(ra, a, rb, b % num_transaxial_detectors);
 
-                      work(mra, new_det_num_a, mrb, new_det_num_b) = geo_data(ra, a, rb, b % num_transaxial_detectors);
+                    if ( (*local_work_sptrs[thread_num]).is_in_data(mra, new_det_num_a, mrb, new_det_num_b))
+                      (*local_work_sptrs[thread_num])(mra, new_det_num_a, mrb, new_det_num_b) = geo_data(ra, a, rb, b % num_transaxial_detectors);
 
-                    if (work.is_in_data(mra, ma, mrb, mb))
-
-                      work(mra, ma, mrb, mb) = geo_data(ra, a, rb, b % num_transaxial_detectors);
+                    if ( (*local_work_sptrs[thread_num]).is_in_data(mra, ma, mrb, mb))
+                      (*local_work_sptrs[thread_num])(mra, ma, mrb, mb) = geo_data(ra, a, rb, b % num_transaxial_detectors);
                   }
-              }
-          }
+            }
+      }
+  }
+  std::cout << " loops over geo finished! " << std::endl;
 
+std::cout<<"Now flattening the FanProjData data ..." << std::endl;
+#ifdef STIR_OPENMP
+// flatten data constructed by threads
+{
+  for (int i = 1; i < static_cast<int>(local_work_sptrs.size()); ++i)
+    if (!is_null_ptr(local_work_sptrs[i])) // only accumulate if a thread filled something in
+      (*local_work_sptrs[0]) += (*local_work_sptrs[i]);
+}
+#endif
+std::cout<<"Loop over fan_data ..." << std::endl;
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for collapse(2)
+#  endif
+#endif
   for (int ra = fan_data.get_min_ra(); ra <= fan_data.get_max_ra(); ++ra)
     for (int a = fan_data.get_min_a(); a <= fan_data.get_max_a(); ++a)
       //    for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
@@ -1404,16 +1824,34 @@ apply_geo_norm(FanProjData& fan_data, const GeoData3D& geo_data, const bool appl
             if (fan_data(ra, a, rb, b) == 0)
               continue;
             if (apply)
-              fan_data(ra, a, rb, b) *= work(ra, a, rb, b % num_transaxial_detectors);
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp atomic
+#  endif
+#endif
+              fan_data(ra, a, rb, b) *= (*local_work_sptrs[0])(ra, a, rb, b % num_transaxial_detectors);
             else
-              fan_data(ra, a, rb, b) /= work(ra, a, rb, b % num_transaxial_detectors);
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp atomic
+#  endif
+#endif
+              fan_data(ra, a, rb, b) /= (*local_work_sptrs[0])(ra, a, rb, b % num_transaxial_detectors);
           }
+  local_work_sptrs.clear();
+  std::cout<<"Finished Loop over fan_data ..." << std::endl;
 }
 
 void
 apply_efficiencies(FanProjData& fan_data, const DetectorEfficiencies& efficiencies, const bool apply)
 {
   const int num_detectors_per_ring = fan_data.get_num_detectors_per_ring();
+
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for collapse(2)
+#  endif
+#endif
   for (int ra = fan_data.get_min_ra(); ra <= fan_data.get_max_ra(); ++ra)
     for (int a = fan_data.get_min_a(); a <= fan_data.get_max_a(); ++a)
       // loop rb from ra to avoid double counting
@@ -1423,8 +1861,18 @@ apply_efficiencies(FanProjData& fan_data, const DetectorEfficiencies& efficienci
             if (fan_data(ra, a, rb, b) == 0)
               continue;
             if (apply)
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp atomic
+#  endif
+#endif
               fan_data(ra, a, rb, b) *= efficiencies[ra][a] * efficiencies[rb][b % num_detectors_per_ring];
             else
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp atomic
+#  endif
+#endif
               fan_data(ra, a, rb, b) /= efficiencies[ra][a] * efficiencies[rb][b % num_detectors_per_ring];
           }
 }
@@ -1432,9 +1880,48 @@ apply_efficiencies(FanProjData& fan_data, const DetectorEfficiencies& efficienci
 void
 make_fan_sum_data(Array<2, float>& data_fan_sums, const FanProjData& fan_data)
 {
+  std::vector<shared_ptr<Array<2, float>>> local_fan_sptrs;
+  int num_threads = 30;
+#ifdef STIR_OPENMP
+#  pragma omp single
+  {
+    std::cout << "Converting fan data to fan sum data..." << std::endl;
+    std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
+
+    local_fan_sptrs.resize(num_threads, shared_ptr<Array<2, float>>());
+
+    for (int i = 0; i < num_threads; i++)
+      {
+        std::cout << "Allocated data to fan sum data... " << i<< std::endl;
+        local_fan_sptrs[i].reset(new Array<2, float>(data_fan_sums));
+        local_fan_sptrs[i]->fill(0);
+      }
+  }
+#endif
+#ifdef STIR_OPENMP
+#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(num_threads)
+#endif
   for (int ra = fan_data.get_min_ra(); ra <= fan_data.get_max_ra(); ++ra)
     for (int a = fan_data.get_min_a(); a <= fan_data.get_max_a(); ++a)
-      data_fan_sums[ra][a] = fan_data.sum(ra, a);
+      {
+#ifdef STIR_OPENMP
+        const int thread_num = omp_get_thread_num();
+#else
+        const int thread_num = 0;
+#endif
+        (*local_fan_sptrs[thread_num])[ra][a] = fan_data.sum(ra, a);
+      }
+
+  data_fan_sums.fill(0.f);
+  std::cout<<"Now flattening the fan sums ..." << std::endl;
+#ifdef STIR_OPENMP
+  // flatten data constructed by threads
+  {
+    for (int i = 0; i < static_cast<int>(local_fan_sptrs.size()); ++i)
+        data_fan_sums += (*local_fan_sptrs[i]);
+  }
+#endif
+  local_fan_sptrs.clear();
 }
 
 template <class TProjDataInfo>
@@ -1544,59 +2031,149 @@ make_geo_data(GeoData3D& geo_data, const FanProjData& fan_data)
 
   // transaxial and axial mirroring
 
+    std::vector<shared_ptr<FanProjData>> local_geo_sptrs;
+    int num_threads = 3;
+  #ifdef STIR_OPENMP
+  #  pragma omp single
+    {
+    std::cout << "Converting fan data to geo data..." << std::endl;
+    std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
+
+    local_geo_sptrs.resize(num_threads, shared_ptr<FanProjData>());
+
+    for (int i = 0; i < num_threads; i++)
+      {
+        std::cout << "Allocated data to geo data... " << i<< std::endl;
+        local_geo_sptrs[i].reset(new FanProjData(fan_data.size(),
+                                               fan_data[0].size(),
+                                               fan_data[0][0].size(),
+                                               fan_data[0][0][0].size()));
+        local_geo_sptrs[i]->fill(0);
+      }
+    }
+    #endif
+
   FanProjData work = fan_data;
   work.fill(0);
-
-  for (int ra = fan_data.get_min_ra(); ra <= fan_data.get_max_ra(); ++ra)
-    for (int a = fan_data.get_min_a(); a <= fan_data.get_max_a(); ++a)
-      // 1// for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
-      for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
-        for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
+    std::cout << "Starting geo loops... This is  slow" << std::endl;
+#ifdef STIR_OPENMP
+#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(num_threads)
+#endif
+    for (int ra = fan_data.get_min_ra(); ra <= fan_data.get_max_ra(); ++ra)
+      {
+        for (int a = fan_data.get_min_a(); a <= fan_data.get_max_a(); ++a)
           {
+            // 1// for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
+            for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
+              {
+                for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
+                  {
 
-            const int ma = num_transaxial_detectors - 1 - a;
-            const int mb = (2 * num_transaxial_detectors - 1 - b) % num_transaxial_detectors;
-            const int mra = num_axial_detectors - 1 - ra;
-            const int mrb = (num_axial_detectors - 1 - rb);
-
-            if (ra != mra && rb != mrb)
-              work(ra, a, rb, b)
-                  = fan_data(ra, a, rb, b) + fan_data(ra, ma, rb, mb) + fan_data(mra, a, mrb, b) + fan_data(mra, ma, mrb, mb);
-            else
-              work(ra, a, rb, b) = fan_data(ra, a, rb, b) + fan_data(ra, ma, rb, mb);
+#ifdef STIR_OPENMP
+                  const int thread_num = omp_get_thread_num();
+#else
+                  const int thread_num = 0;
+#endif
+                    const int ma = num_transaxial_detectors - 1 - a;
+                    const int mb = (2 * num_transaxial_detectors - 1 - b) % num_transaxial_detectors;
+                    const int mra = num_axial_detectors - 1 - ra;
+                    const int mrb = (num_axial_detectors - 1 - rb);
+
+                    if (ra != mra && rb != mrb)
+                      (*local_geo_sptrs[thread_num])(ra, a, rb, b)
+                          = fan_data(ra, a, rb, b) + fan_data(ra, ma, rb, mb) + fan_data(mra, a, mrb, b) + fan_data(mra, ma, mrb, mb);
+                    else
+                      (*local_geo_sptrs[thread_num])(ra, a, rb, b) = fan_data(ra, a, rb, b) + fan_data(ra, ma, rb, mb);
+                  }
+              }
           }
+      }
 
-  geo_data.fill(0);
+      std::cout<<"Now flattening the geo data ...This is very slow" << std::endl;
+    #ifdef STIR_OPENMP
+      // flatten data constructed by threads
+      {
+            for (int i = 0; i < static_cast<int>(local_geo_sptrs.size()); ++i)
+              if (!is_null_ptr(local_geo_sptrs[i])) // only accumulate if a thread filled something in
+                work+= (*local_geo_sptrs[i]);
+      }
+    #endif
 
-  for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
-    //  for (int a = 0; a <= num_transaxial_detectors/2; ++a)
-    for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
-      // loop rb from ra to avoid double counting
-      // for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
-      for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
-        for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
-          {
+      local_geo_sptrs.clear();
+      geo_data.fill(0);
 
-            // rotation
+      std::vector<shared_ptr<GeoData3D>> local_gg_sptrs;
+     num_threads = 30;
+#ifdef STIR_OPENMP
+#  pragma omp single
 
-            for (int axial_block_num = 0; axial_block_num < num_axial_blocks; ++axial_block_num)
-              {
+      {
+        std::cout << "Converting fan data to fan GEO data..." << std::endl;
+        std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
 
-                for (int transaxial_block_num = 0; transaxial_block_num < num_transaxial_blocks; ++transaxial_block_num)
-                  {
+        local_gg_sptrs.resize(num_threads, shared_ptr<GeoData3D>());
 
-                    const int transaxial_det_inc = transaxial_block_num * num_transaxial_crystals_per_block;
-                    const int new_det_num_a = (a + transaxial_det_inc) % num_transaxial_detectors;
-                    const int new_det_num_b = (b + transaxial_det_inc) % num_transaxial_detectors;
-                    const int axial_det_inc = axial_block_num * num_axial_crystals_per_block;
-                    const int new_ring_num_a = ra + axial_det_inc;
-                    const int new_ring_num_b = rb + axial_det_inc;
-                    if (fan_data.is_in_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b))
-                      geo_data(ra, a, rb, b % num_transaxial_detectors)
-                          += work(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b);
-                  }
-              }
+        for (int i = 0; i < num_threads; i++)
+          {
+            std::cout << "Allocated data to fan sum data... " << i<< std::endl;
+            local_gg_sptrs[i].reset(new GeoData3D(geo_data.size(),
+                                                  geo_data[0].size(),
+                                                  geo_data[0][0].size(),
+                                                  geo_data[0][0][0].size()));
+            local_gg_sptrs[i]->fill(0);
           }
+      }
+#endif
+
+#ifdef STIR_OPENMP
+#pragma omp parallel for schedule(dynamic) collapse(4) num_threads(num_threads -1 )
+#endif
+      for (int axial_block_num = 0; axial_block_num < num_axial_blocks; ++axial_block_num)
+        {
+          for (int transaxial_block_num = 0; transaxial_block_num < num_transaxial_blocks; ++transaxial_block_num)
+            {
+              for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
+                //  for (int a = 0; a <= num_transaxial_detectors/2; ++a)
+                for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
+                  {
+                    // loop rb from ra to avoid double counting
+                    // for (int rb = fan_data.get_min_ra(); rb <= fan_data.get_max_ra(); ++rb)
+                    for (int rb = max(ra, fan_data.get_min_rb(ra)); rb <= fan_data.get_max_rb(ra); ++rb)
+                      for (int b = fan_data.get_min_b(a); b <= fan_data.get_max_b(a); ++b)
+                        {
+                          // rotation
+                          const int transaxial_det_inc = transaxial_block_num * num_transaxial_crystals_per_block;
+                          const int new_det_num_a = (a + transaxial_det_inc) % num_transaxial_detectors;
+                          const int new_det_num_b = (b + transaxial_det_inc) % num_transaxial_detectors;
+                          const int axial_det_inc = axial_block_num * num_axial_crystals_per_block;
+                          const int new_ring_num_a = ra + axial_det_inc;
+                          const int new_ring_num_b = rb + axial_det_inc;
+                          if (fan_data.is_in_data(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b))
+                            {
+#ifdef STIR_OPENMP
+                              const int thread_num = omp_get_thread_num();
+#else
+                              const int thread_num = 0;
+#endif
+                              (*local_gg_sptrs[thread_num])(ra, a, rb, b % num_transaxial_detectors)
+                                  += work(new_ring_num_a, new_det_num_a, new_ring_num_b, new_det_num_b);
+                            }
+                        }
+                  }
+            }
+        }
+
+  std::cout<<"Now flattening the final geo data ..." << std::endl;
+#ifdef STIR_OPENMP
+  // flatten data constructed by threads
+  {
+    for (int i = 0; i < static_cast<int>(local_gg_sptrs.size()); ++i)
+      if (!is_null_ptr(local_gg_sptrs[i])) // only accumulate if a thread filled something in
+        geo_data += (*local_gg_sptrs[i]);
+  }
+#endif
+
+  local_gg_sptrs.clear();
 }
 
 void
@@ -1634,20 +2211,32 @@ iterate_efficiencies(DetectorEfficiencies& efficiencies, const Array<2, float>&
   assert(model.get_max_ra() == data_fan_sums.get_max_index());
   assert(model.get_min_a() == data_fan_sums[data_fan_sums.get_min_index()].get_min_index());
   assert(model.get_max_a() == data_fan_sums[data_fan_sums.get_min_index()].get_max_index());
+
   for (int ra = model.get_min_ra(); ra <= model.get_max_ra(); ++ra)
+    {
+      std::cout <<">> ra: " << ra << " from "<< model.get_min_ra() << " to " <<model.get_max_ra() << std::endl;
     for (int a = model.get_min_a(); a <= model.get_max_a(); ++a)
       {
+
+        // std::cout <<" >a: " << a << " from "<< model.get_min_a() << " to " <<model.get_max_a() << std::endl;
         if (data_fan_sums[ra][a] == 0)
           efficiencies[ra][a] = 0;
         else
           {
-            float denominator = 0;
+            double denominator = 0;
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for collapse(2) reduction(+ : denominator)
+#  endif
+#endif
             for (int rb = model.get_min_rb(ra); rb <= model.get_max_rb(ra); ++rb)
               for (int b = model.get_min_b(a); b <= model.get_max_b(a); ++b)
-                denominator += efficiencies[rb][b % num_detectors_per_ring] * model(ra, a, rb, b);
-            efficiencies[ra][a] = data_fan_sums[ra][a] / denominator;
+                denominator += static_cast<double>(efficiencies[rb][b % num_detectors_per_ring] * model(ra, a, rb, b));
+
+            efficiencies[ra][a] = data_fan_sums[ra][a] / static_cast<float>(denominator);
           }
       }
+    }
 }
 
 // version without model
@@ -1669,12 +2258,17 @@ iterate_efficiencies(DetectorEfficiencies& efficiencies,
           efficiencies[ra][a] = 0;
         else
           {
-            float denominator = 0;
+            double denominator = 0;
+#ifdef STIR_OPENMP
+#  if _OPENMP >= 201107
+#    pragma omp parallel for collapse(2) reduction(+ : denominator)
+#  endif
+#endif
             for (int rb = max(ra - max_ring_diff, 0); rb <= min(ra + max_ring_diff, num_rings - 1); ++rb)
               for (int b = a + num_detectors_per_ring / 2 - half_fan_size; b <= a + num_detectors_per_ring / 2 + half_fan_size;
                    ++b)
                 denominator += efficiencies[rb][b % num_detectors_per_ring];
-            efficiencies[ra][a] = data_fan_sums[ra][a] / denominator;
+            efficiencies[ra][a] = data_fan_sums[ra][a] / static_cast<float>(denominator);
           }
 #ifdef WRITE_ALL
         {
@@ -1703,24 +2297,69 @@ iterate_geo_norm(GeoData3D& norm_geo_data, const GeoData3D& measured_geo_data, c
   make_geo_data(norm_geo_data, model);
   // norm_geo_data = measured_geo_data / norm_geo_data;
 
+  std::vector<shared_ptr<GeoData3D>> local_geo_sptrs;
+  int num_threads = 10;
+#ifdef STIR_OPENMP
+#  pragma omp single
+  {
+    std::cout << "Converting fan data to geo data..." << std::endl;
+    std::cout << "We will be using " << num_threads << " threads... If your machine does not support that please contact sb from STIR" << std::endl;
+
+    local_geo_sptrs.resize(num_threads, shared_ptr<GeoData3D>());
+
+    for (int i = 0; i < num_threads; i++)
+      {
+        std::cout << "Allocated data to geo data... " << i<< std::endl;
+        local_geo_sptrs[i].reset(new GeoData3D(norm_geo_data.size(),
+                                               norm_geo_data[0].size(),
+                                               norm_geo_data[0][0].size(),
+                                               norm_geo_data[0][0][0].size()));
+        local_geo_sptrs[i]->fill(0);
+      }
+  }
+#endif
+
   const int num_axial_crystals_per_block = measured_geo_data.get_num_axial_crystals_per_block();
   const int num_transaxial_crystals_per_block = measured_geo_data.get_half_num_transaxial_crystals_per_block() * 2;
 
   const float threshold = measured_geo_data.find_max() / 10000.F;
 
+  std::cout << "Iterating over geo norm" << std::endl;
+#ifdef STIR_OPENMP
+#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(num_threads)
+#endif
   for (int ra = 0; ra < num_axial_crystals_per_block; ++ra)
-    for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
-      // loop rb from ra to avoid double counting
-      //   for (int rb = model.get_min_ra(); rb <= model.get_max_ra(); ++rb)
-      for (int rb = max(ra, model.get_min_rb(ra)); rb <= model.get_max_rb(ra); ++rb)
-        for (int b = model.get_min_b(a); b <= model.get_max_b(a); ++b)
-          {
+    {
+      for (int a = 0; a < num_transaxial_crystals_per_block / 2; ++a)
+        {
+          // loop rb from ra to avoid double counting
+          //   for (int rb = model.get_min_ra(); rb <= model.get_max_ra(); ++rb)
+#ifdef STIR_OPENMP
+          const int thread_num = omp_get_thread_num();
+#else
+          const int thread_num = 0;
+#endif
+          for (int rb = max(ra, model.get_min_rb(ra)); rb <= model.get_max_rb(ra); ++rb)
+            for (int b = model.get_min_b(a); b <= model.get_max_b(a); ++b)
+              {
 
-            norm_geo_data(ra, a, rb, b) = (measured_geo_data(ra, a, rb, b) >= threshold
-                                           || measured_geo_data(ra, a, rb, b) < 10000 * norm_geo_data(ra, a, rb, b))
-                                              ? measured_geo_data(ra, a, rb, b) / norm_geo_data(ra, a, rb, b)
-                                              : 0;
-          }
+                (*local_geo_sptrs[thread_num])(ra, a, rb, b) = (measured_geo_data(ra, a, rb, b) >= threshold
+                                                                || measured_geo_data(ra, a, rb, b) < 10000 * norm_geo_data(ra, a, rb, b))
+                                                                   ? measured_geo_data(ra, a, rb, b) / norm_geo_data(ra, a, rb, b)
+                                                                   : 0;
+              }
+        }
+    }
+
+  std::cout<<"Now flattening the geo data ..." << std::endl;
+#ifdef STIR_OPENMP
+  // flatten data constructed by threads
+  {
+    for (int i = 0; i < static_cast<int>(local_geo_sptrs.size()); ++i)
+      if (!is_null_ptr(local_geo_sptrs[i])) // only accumulate if a thread filled something in
+        norm_geo_data += *(local_geo_sptrs[i]);
+  }
+#endif
 }
 
 void
diff --git a/src/buildblock/ProjDataFromStream.cxx b/src/buildblock/ProjDataFromStream.cxx
index bcb47f9c6..13c9a0b71 100644
--- a/src/buildblock/ProjDataFromStream.cxx
+++ b/src/buildblock/ProjDataFromStream.cxx
@@ -79,7 +79,7 @@ ProjDataFromStream::ProjDataFromStream(shared_ptr<const ExamInfo> const& exam_in
   assert(storage_order != Unsupported);
   assert(!(data_type == NumericType::UNKNOWN_TYPE));
 
-  if (proj_data_info_sptr->get_num_tof_poss() > 1)
+  if (proj_data_info_sptr->is_tof_data())
     activate_TOF();
 }
 
diff --git a/src/buildblock/ProjDataInfo.cxx b/src/buildblock/ProjDataInfo.cxx
index 5dd6b44ce..37b9baf44 100644
--- a/src/buildblock/ProjDataInfo.cxx
+++ b/src/buildblock/ProjDataInfo.cxx
@@ -177,9 +177,46 @@ ProjDataInfo::set_tof_mash_factor(const int new_num)
     {
       tof_mash_factor = new_num;
       if (tof_mash_factor > scanner_ptr->get_max_num_timing_poss())
-        error("ProjDataInfo::set_tof_mash_factor: TOF mashing factor (" + std::to_string(tof_mash_factor)
+        {
+        warning("ProjDataInfo::set_tof_mash_factor: TOF mashing factor (" + std::to_string(tof_mash_factor)
               + +") must be smaller than or equal to the scanner's number of max timing bins ("
               + std::to_string(scanner_ptr->get_max_num_timing_poss()) + ").");
+        }
+      else if (tof_mash_factor == scanner_ptr->get_max_num_timing_poss())
+        // This is a special case that we just want boundaries for the coincidence window.
+        {
+          // Now, initialise the mashed TOF bins.
+          tof_increament_in_mm = tof_delta_time_to_mm(tof_mash_factor * scanner_ptr->get_size_of_timing_pos());
+          Bin bin;
+          bin.timing_pos_num() = min_tof_pos_num;
+          //Get lowest low
+          float cur_low = get_k(bin) - get_sampling_in_k(bin) / 2.f;
+          //Get highest high
+          bin.timing_pos_num() = max_tof_pos_num;
+          float cur_high = get_k(bin) + get_sampling_in_k(bin) / 2.f;
+
+
+          min_tof_pos_num = 0;
+          max_tof_pos_num = 0;
+          num_tof_bins = 1;
+          // Upper and lower boundaries of the timing poss;
+          tof_bin_boundaries_mm.recycle();
+          tof_bin_boundaries_mm.grow(min_tof_pos_num, max_tof_pos_num);
+          tof_bin_boundaries_ps.recycle();
+          tof_bin_boundaries_ps.grow(min_tof_pos_num, max_tof_pos_num);
+
+          tof_bin_boundaries_mm[0].low_lim = cur_low;
+          tof_bin_boundaries_mm[0].high_lim = cur_high;
+
+          tof_bin_boundaries_ps[0].low_lim = static_cast<float>(mm_to_tof_delta_time(tof_bin_boundaries_mm[0].low_lim));
+          tof_bin_boundaries_ps[0].high_lim = static_cast<float>(mm_to_tof_delta_time(tof_bin_boundaries_mm[0].high_lim));
+
+          info(boost::format("Tbin %1%: %2% - %3% mm (%4% - %5% ps) = %6%") % 0 % tof_bin_boundaries_mm[0].low_lim
+               % tof_bin_boundaries_mm[0].high_lim % tof_bin_boundaries_ps[0].low_lim % tof_bin_boundaries_ps[0].high_lim
+               % get_sampling_in_k(bin));
+
+          return;
+        }
 
 #if 0
         // KT: code disabled as buggy but currently not needed
diff --git a/src/buildblock/Scanner.cxx b/src/buildblock/Scanner.cxx
index b66dee692..0c4a90a9c 100644
--- a/src/buildblock/Scanner.cxx
+++ b/src/buildblock/Scanner.cxx
@@ -1231,6 +1231,64 @@ Scanner::Scanner(Type scanner_type)
       );
       break;
 
+
+    case PSMR_3rings_scanner:
+      set_params(PSMR_3rings_scanner,
+                 string_list("PSMR_3rings_scanner"),
+                 3 * 14 * 6,// + 8,
+                 411,
+                 401,
+                 24 * 7 * 5,
+                 405.0 - 18/2,
+                 7.0F,
+                 2.85F,
+                 2.08626F,
+                 static_cast<float>(-0.137), //-0.18
+                 14,//            int num_axial_blocks_per_bucket_v,
+                 5,  //            int num_transaxial_blocks_per_bucket_v,
+                 6, //            int num_axial_crystals_per_block_v,
+                 7,//            int num_transaxial_crystals_per_block_v,
+                 84, //            int num_axial_crystals_per_singles_unit_v,
+                 35,  //            int num_transaxial_crystals_per_singles_unit_v,
+                 1,
+                 0.0F, 511.F,
+#ifdef STIR_TOF
+                 1,
+                 2011.5F*2.F, // Size of coincidence window
+                 0.F
+#endif
+                 );
+      break;
+
+  case PSMR_3rings_scanner_TOF:
+    set_params(PSMR_3rings_scanner_TOF,
+               string_list("PSMR_3rings_scanner_TOF"),
+               3 * 14 * 6,// + 8,
+               344, 344,
+               24 * 7 * 5,
+               405.0 - 18/2,
+               7.0F,
+               2.85F,
+               2.08626F,
+               static_cast<float>(-0.137),
+               14,//            int num_axial_blocks_per_bucket_v,
+               5,  //            int num_transaxial_blocks_per_bucket_v,
+               6, //            int num_axial_crystals_per_block_v,
+               7,//            int num_transaxial_crystals_per_block_v,
+               84, //            int num_axial_crystals_per_singles_unit_v,
+               35,  //            int num_transaxial_crystals_per_singles_unit_v,
+               1,
+               0.0F, 511.F,
+#ifdef STIR_TOF
+               //               (short int)(2*3.51*1000 -1),
+               //               (float)(1.f),
+               (short int)(13), // 13 TOF bins
+               (float)(2011.5F*2.F/13), //
+               (float)(500.F)
+#endif
+               );
+    break;
+
     case User_defined_scanner: // zlong, 08-04-2004, Userdefined support
 
       set_params(User_defined_scanner,
diff --git a/src/include/stir/ML_norm.h b/src/include/stir/ML_norm.h
index 5806012e8..54e01b1a0 100644
--- a/src/include/stir/ML_norm.h
+++ b/src/include/stir/ML_norm.h
@@ -160,7 +160,7 @@ class GeoData3D : public Array<4, float>
   int num_detectors_per_ring;
 };
 
-class FanProjData : private Array<4, float>
+class FanProjData : public Array<4, float>
 {
 public:
   FanProjData();
@@ -205,8 +205,15 @@ class FanProjData : private Array<4, float>
 
 typedef FanProjData BlockData3D;
 
+double make_all_fan_data_from_cache(
+                                  FanProjData& fan_data,
+                                  const ProjData& proj_data,
+            const FanProjData& model);
+
 void make_fan_data_remove_gaps(FanProjData& fan_data, const ProjData& proj_data);
 
+void load_fan_data(FanProjData& fan_data, const ProjData& proj_data, const std::string fan_filename = "");
+
 void set_fan_data_add_gaps(ProjData& proj_data, const FanProjData& fan_data, const float gap_value = 0.F);
 
 void apply_block_norm(FanProjData& fan_data, const BlockData3D& block_data, const bool apply = true);
diff --git a/src/include/stir/ProjDataInfo.h b/src/include/stir/ProjDataInfo.h
index f6ef1a032..f93178380 100644
--- a/src/include/stir/ProjDataInfo.h
+++ b/src/include/stir/ProjDataInfo.h
@@ -150,7 +150,9 @@ class ProjDataInfo
 
   //! Similar to create_shared_clone() but returns a non-tof version of ProjDataInfo setting tof mashing factor = 0
   inline shared_ptr<ProjDataInfo> create_non_tof_clone() const;
-
+  //! Similar to create_non_tof_clone() but returns a single TOF version of the ProjDataInfo setting the TOF mashing factor sush that,
+  //! a single TOF bin remains. This is usefull to apply the coincidence window boundaries without affecting the probabilities.
+    inline shared_ptr<ProjDataInfo> create_single_tof_clone() const;
   //! Destructor
   virtual ~ProjDataInfo() {}
 
diff --git a/src/include/stir/ProjDataInfo.inl b/src/include/stir/ProjDataInfo.inl
index 8677a7083..9d0108b5b 100644
--- a/src/include/stir/ProjDataInfo.inl
+++ b/src/include/stir/ProjDataInfo.inl
@@ -44,6 +44,19 @@ ProjDataInfo::create_non_tof_clone() const
   return sptr;
 }
 
+shared_ptr<ProjDataInfo>
+ProjDataInfo::create_single_tof_clone() const
+{
+  shared_ptr<ProjDataInfo> sptr(this->clone());
+  int a  = sptr->get_num_tof_poss();
+  if (a > 0)
+    sptr->set_tof_mash_factor(a);
+  // - TODO: remove this later
+  // int b  = sptr->get_num_tof_poss();
+  // std::cout <<"Old number of TOF poss " << a << " new number of TOF poss " << b << std::endl;
+  return sptr;
+}
+
 int
 ProjDataInfo::get_num_segments() const
 {
@@ -183,10 +196,10 @@ ProjDataInfo::is_tof_data() const
   // First case: if tof_mash_factor == 0, scanner is not tof ready and no tof data
   if (tof_mash_factor == 0)
     {
-      if (num_tof_bins != 1)
-        {
-          error("Non-TOF data with inconsistent Time-of-Flight bin number - Aborted operation.");
-        }
+      // if (num_tof_bins != 1)
+      //   {
+      //     warning("Non-TOF data with inconsistent Time-of-Flight bin number - Note. As we are updating the scanner template style please make sure to have one TOF position (whole coincidence window for nonTOF scanners).");
+      //   }
       return false;
     }
   // Second case: when tof_mash_factor is strictly positive, it means we have TOF data
diff --git a/src/include/stir/Scanner.h b/src/include/stir/Scanner.h
index 79c8cbde8..efb5718c7 100644
--- a/src/include/stir/Scanner.h
+++ b/src/include/stir/Scanner.h
@@ -83,6 +83,11 @@ class Succeeded;
           while others (such as CTI scanners) give only singles for a
           collection of blocks. A \c singles_unit is then a set of crystals
           for which we can get singles rates.
+    \li \c If the user set number for TOF positions to 1 and a bin size equal to the
+          coincidence window then the reconstruction will essential be nonTOF but the
+            projector will restrict the size of the LOR to the size of the coincidence window.
+      \li \c The scanner will be classified as TOF enabled when the numer of TOF bins and
+TOF bin size are  >1 and >0, respectively. If the timing resolution is not set, we will attempt to handle this if the number TOF positions is 1 (the projector will then just use the bin-size to restrict the size of the LOR).
 
       A further complication is that some scanners (including many Siemens scanners)
       insert virtual crystals in the sinogram data (corresponding to gaps between
@@ -168,6 +173,8 @@ class Scanner
     HZLR,
     RATPET,
     PANDA,
+    PSMR_3rings_scanner,
+    PSMR_3rings_scanner_TOF,
     HYPERimage,
     nanoPET,
     HRRT,
diff --git a/src/include/stir/Scanner.inl b/src/include/stir/Scanner.inl
index 874f97e45..b7db17643 100644
--- a/src/include/stir/Scanner.inl
+++ b/src/include/stir/Scanner.inl
@@ -262,7 +262,7 @@ Scanner::get_timing_resolution() const
 bool
 Scanner::is_tof_ready() const
 {
-  return (max_num_of_timing_poss > 0 && size_timing_pos > 0.0f && timing_resolution > 0.0f);
+  return (max_num_of_timing_poss > 0 && size_timing_pos > 0.0f ); //&& timing_resolution > 0.0f);
 }
 
 std::string
diff --git a/src/include/stir/recon_buildblock/ML_estimate_component_based_normalisation.h b/src/include/stir/recon_buildblock/ML_estimate_component_based_normalisation.h
index f7fa35ce6..1496092de 100644
--- a/src/include/stir/recon_buildblock/ML_estimate_component_based_normalisation.h
+++ b/src/include/stir/recon_buildblock/ML_estimate_component_based_normalisation.h
@@ -9,10 +9,11 @@
 /*!
  \file
  \ingroup recon_buildblock
- \brief Declaration of ML_estimate_component_based_normalisation
+ \brief Declaration of stir::ML_estimate_component_based_normalisation
  \author Kris Thielemans
  */
 #include "stir/common.h"
+#include <string>
 
 START_NAMESPACE_STIR
 
@@ -21,7 +22,17 @@ class ProjData;
 /*!
  \brief Find normalisation factors using a maximum likelihood approach
 
-  \ingroup recon_buildblock
+ Output is currently a set of text files in an (awkard) format, as used
+ by \c apply_normfactors3D.
+
+ \param[in] do_geo find geometric component
+ \param[in] do_block estimate block-pair components (intended for timing alignment). Do NOT use.
+ \param[in] do_symmetry_per_block estimate rotational symmetry per block if \c true, or per bucket (recommended) if \c false
+ \param[in] use_lm_cache read fan-sums of the measured data directly (experimental)
+ \param[in] use_model_fan_data read model fan data from binary file (experimental)
+ \param[in] model_fan_data_filename filename to read
+
+ \ingroup recon_buildblock
 */
 void ML_estimate_component_based_normalisation(const std::string& out_filename_prefix,
                                                const ProjData& measured_data,
@@ -29,9 +40,12 @@ void ML_estimate_component_based_normalisation(const std::string& out_filename_p
                                                int num_eff_iterations,
                                                int num_iterations,
                                                bool do_geo,
-                                               bool do_block,
-                                               bool do_symmetry_per_block,
-                                               bool do_KL,
-                                               bool do_display);
+                                               bool do_block = false,
+                                               bool do_symmetry_per_block = false,
+                                               bool do_KL = false,
+                                               bool do_display = false,
+                                               bool use_lm_cache = false,
+                                               bool use_model_fan_data = false,
+                                               std::string model_fan_data_filename = "");
 
 END_NAMESPACE_STIR
diff --git a/src/include/stir/recon_buildblock/ProjMatrixByBin.inl b/src/include/stir/recon_buildblock/ProjMatrixByBin.inl
index 8f041306e..da964e4eb 100644
--- a/src/include/stir/recon_buildblock/ProjMatrixByBin.inl
+++ b/src/include/stir/recon_buildblock/ProjMatrixByBin.inl
@@ -67,7 +67,7 @@ ProjMatrixByBin::get_proj_matrix_elems_for_one_bin(ProjMatrixElemsForOneBin& pro
 #ifndef NDEBUG
           probabilities.check_state();
 #endif
-          if (proj_data_info_sptr->is_tof_data() && this->tof_enabled)
+          if (proj_data_info_sptr->is_tof_data())
             { // Apply TOF kernel to basic bin
               apply_tof_kernel(probabilities);
             }
@@ -97,7 +97,7 @@ ProjMatrixByBin::get_proj_matrix_elems_for_one_bin(ProjMatrixElemsForOneBin& pro
 #ifndef NDEBUG
               probabilities.check_state();
 #endif
-              if (proj_data_info_sptr->is_tof_data() && this->tof_enabled)
+              if (proj_data_info_sptr->is_tof_data())
                 { // Apply TOF kernel to basic bin
                   apply_tof_kernel(probabilities);
                 }
@@ -149,7 +149,16 @@ ProjMatrixByBin::get_tof_value(const float d1, const float d2) const
   if ((d1_n >= 4.f && d2_n >= 4.f) || (d1_n <= -4.f && d2_n <= -4.f))
     return 0.F;
   else
-    return static_cast<float>(0.5 * (erf_interpolation(d2_n) - erf_interpolation(d1_n)));
+    {
+       if (this->tof_enabled)
+        {
+             return static_cast<float>(0.5 * (erf_interpolation(d2_n) - erf_interpolation(d1_n)));
+        }
+       else
+         {
+           return 1.0f;
+         }
+    }
 }
 
 END_NAMESPACE_STIR
diff --git a/src/listmode_buildblock/CListModeDataROOT.cxx b/src/listmode_buildblock/CListModeDataROOT.cxx
index 11645f407..1bd772336 100644
--- a/src/listmode_buildblock/CListModeDataROOT.cxx
+++ b/src/listmode_buildblock/CListModeDataROOT.cxx
@@ -187,10 +187,10 @@ CListModeDataROOT::CListModeDataROOT(const std::string& hroot_filename)
       this_scanner_sptr->get_num_virtual_transaxial_crystals_per_block());
 
   // Compare with InputStreamFromROOTFile scanner generated geometry and throw error if wrong.
-  if (check_scanner_match_geometry(error_str, this_scanner_sptr) == Succeeded::no)
-    {
-      error(error_str.c_str());
-    }
+  // if (check_scanner_match_geometry(error_str, this_scanner_sptr) == Succeeded::no)
+  //   {
+  //     error(error_str.c_str());
+  //   }
 
   proj_data_info_sptr = std::const_pointer_cast<const ProjDataInfo>(
       ProjDataInfo::construct_proj_data_info(this_scanner_sptr,
diff --git a/src/recon_buildblock/DataSymmetriesForBins_PET_CartesianGrid.cxx b/src/recon_buildblock/DataSymmetriesForBins_PET_CartesianGrid.cxx
index 8cba852b0..96675fa9b 100644
--- a/src/recon_buildblock/DataSymmetriesForBins_PET_CartesianGrid.cxx
+++ b/src/recon_buildblock/DataSymmetriesForBins_PET_CartesianGrid.cxx
@@ -340,7 +340,7 @@ DataSymmetriesForBins_PET_CartesianGrid::DataSymmetriesForBins_PET_CartesianGrid
         }
 
       // RT Disabling some symmetries due to tof data
-      if (proj_data_info_ptr->is_tof_data())
+      if (proj_data_info_ptr->is_tof_data() && proj_data_info_ptr->get_num_tof_poss() > 1)
         {
           if (this->do_symmetry_90degrees_min_phi || this->do_symmetry_180degrees_min_phi)
             {
diff --git a/src/recon_buildblock/ML_estimate_component_based_normalisation.cxx b/src/recon_buildblock/ML_estimate_component_based_normalisation.cxx
index a22ed6b0b..0cf9ba34c 100644
--- a/src/recon_buildblock/ML_estimate_component_based_normalisation.cxx
+++ b/src/recon_buildblock/ML_estimate_component_based_normalisation.cxx
@@ -28,10 +28,15 @@
 #include "stir/info.h"
 #include "stir/warning.h"
 #include "stir/ProjData.h"
+#include "stir/num_threads.h"
 #include <boost/format.hpp>
 #include <fstream>
 #include <string>
 #include <algorithm>
+#include "stir/IO/read_data.h"
+#include "stir/FilePath.h"
+#include <iostream>
+#include <fstream>
 
 START_NAMESPACE_STIR
 
@@ -45,7 +50,10 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                                           bool do_block,
                                           bool do_symmetry_per_block,
                                           bool do_KL,
-                                          bool do_display)
+                                          bool do_display,
+                                          bool use_lm_cache,
+                                          bool use_model_fan_data,
+                                          std::string model_fansums_filename)
 {
 
   const int num_transaxial_blocks = measured_data.get_proj_data_info_sptr()->get_scanner_sptr()->get_num_transaxial_blocks();
@@ -86,6 +94,7 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
 
   FanProjData model_fan_data;
   FanProjData fan_data;
+  info("Allocating memory ...", 3);
   DetectorEfficiencies data_fan_sums(IndexRange2D(num_physical_rings, num_physical_detectors_per_ring));
   DetectorEfficiencies efficiencies(IndexRange2D(num_physical_rings, num_physical_detectors_per_ring));
 
@@ -93,44 +102,111 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                               num_physical_transaxial_crystals_per_basic_unit / 2,
                               num_physical_rings,
                               num_physical_detectors_per_ring); // inputes have to be modified
+
   GeoData3D norm_geo_data(num_physical_axial_crystals_per_basic_unit,
                           num_physical_transaxial_crystals_per_basic_unit / 2,
                           num_physical_rings,
                           num_physical_detectors_per_ring); // inputes have to be modified
 
-  BlockData3D measured_block_data(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
-  BlockData3D norm_block_data(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
+  BlockData3D measured_block_data;
+  BlockData3D norm_block_data;
+
+  if (do_block)
+    {
+      measured_block_data = BlockData3D(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
+      norm_block_data = BlockData3D(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
+    }
+
+  if (!use_model_fan_data)
+    make_fan_data_remove_gaps(model_fan_data, model_data);
+  else
+    {
+      load_fan_data(model_fan_data, model_data, model_fansums_filename);
+    }
 
-  make_fan_data_remove_gaps(model_fan_data, model_data);
+  double model_fan_data_sum = 0.f;
+
+  {
+    std::vector<shared_ptr<float>> local_model_fan_data_sum;
+    const int num_threads = std::min(50, get_default_num_threads());
+
+    local_model_fan_data_sum.resize(num_threads, shared_ptr<float>());
+    for (int i = 0; i < num_threads; i++)
+      local_model_fan_data_sum[i].reset(new float(0.0));
+
+#ifdef STIR_OPENMP
+#  pragma omp parallel for schedule(dynamic) num_threads(num_threads)
+#endif
+    for (int i = 0; i < model_fan_data.size(); i++)
+      {
+#ifdef STIR_OPENMP
+        const int thread_num = omp_get_thread_num();
+#else
+        const int thread_num = 0;
+#endif
+        *local_model_fan_data_sum[thread_num] += model_fan_data[i].sum();
+      }
+
+    for (int i = 0; i < local_model_fan_data_sum.size(); ++i)
+      model_fan_data_sum += *local_model_fan_data_sum[i];
+
+    local_model_fan_data_sum.clear();
+    info("Model sum of fan data: " + std::to_string(model_fan_data_sum), 3);
+  }
   {
     // next could be local if KL is not computed below
     FanProjData measured_fan_data;
     float threshold_for_KL;
+    double data_fan_sum = 0;
     // compute factors dependent on the data
     {
-      make_fan_data_remove_gaps(measured_fan_data, measured_data);
 
-      /* TEMP FIX */
-      for (int ra = model_fan_data.get_min_ra(); ra <= model_fan_data.get_max_ra(); ++ra)
+      if (!use_lm_cache)
         {
-          for (int a = model_fan_data.get_min_a(); a <= model_fan_data.get_max_a(); ++a)
+          make_fan_data_remove_gaps(measured_fan_data, measured_data);
+
+          /* TEMP FIX */
+          for (int ra = model_fan_data.get_min_ra(); ra <= model_fan_data.get_max_ra(); ++ra)
             {
-              for (int rb = std::max(ra, model_fan_data.get_min_rb(ra)); rb <= model_fan_data.get_max_rb(ra); ++rb)
+              for (int a = model_fan_data.get_min_a(); a <= model_fan_data.get_max_a(); ++a)
                 {
-                  for (int b = model_fan_data.get_min_b(a); b <= model_fan_data.get_max_b(a); ++b)
-                    if (model_fan_data(ra, a, rb, b) == 0)
-                      measured_fan_data(ra, a, rb, b) = 0;
+                  for (int rb = std::max(ra, model_fan_data.get_min_rb(ra)); rb <= model_fan_data.get_max_rb(ra); ++rb)
+                    {
+                      for (int b = model_fan_data.get_min_b(a); b <= model_fan_data.get_max_b(a); ++b)
+                        if (model_fan_data(ra, a, rb, b) == 0)
+                          measured_fan_data(ra, a, rb, b) = 0;
+                    }
                 }
             }
+
+          threshold_for_KL = measured_fan_data.find_max() / 100000.F;
+          // display(measured_fan_data, "measured data");
+
+          make_fan_sum_data(data_fan_sums, measured_fan_data);
+          make_geo_data(measured_geo_data, measured_fan_data);
+          if (do_block)
+            make_block_data(measured_block_data, measured_fan_data);
+
+          data_fan_sum = data_fan_sums.sum();
         }
+      else // Use LM cache to make all data, not blocks for now.
+        {
+          // TODO: The TEMP fix is not applied. So until then, keep the model source slightly bigger than the actual measuremnts
+          // to avoid divisions by zero.
+          data_fan_sum = make_all_fan_data_from_cache(measured_fan_data, measured_data, model_fan_data);
+          float dd = data_fan_sums.find_max();
+          info("Data fan sum max: " + std::to_string(dd), 3);
+          threshold_for_KL =dd / 100000000.F;
 
-      threshold_for_KL = measured_fan_data.find_max() / 100000.F;
-      // display(measured_fan_data, "measured data");
+          info("1. Making fan sum data .. ", 3);
+          make_fan_sum_data(data_fan_sums, measured_fan_data);
+          info("fan sum data min/max " + std::to_string(data_fan_sums.find_min()) + "/" + std::to_string(data_fan_sums.find_max()), 3);
 
-      make_fan_sum_data(data_fan_sums, measured_fan_data);
-      make_geo_data(measured_geo_data, measured_fan_data);
-      make_block_data(measured_block_data, measured_fan_data);
-      if (do_display)
+          info("2. Making fan geo data .. ", 3);
+          make_geo_data(measured_geo_data, measured_fan_data);
+           info("measured_geo_data min/max " + std::to_string(measured_geo_data.find_min()) + "/" + std::to_string(measured_geo_data.find_max()), 3);;
+        }
+      if (do_display && do_block)
         display(measured_block_data, "raw block data from measurements");
 
       /* {
@@ -159,22 +235,34 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
 
     for (int iter_num = 1; iter_num <= std::max(num_iterations, 1); ++iter_num)
       {
+        std::cout << "Iteration number: " << iter_num << std::endl;
         if (iter_num == 1)
           {
-            efficiencies.fill(sqrt(data_fan_sums.sum() / model_fan_data.sum()));
+            info("Calculating sums om fan_data ...", 3);
+            float value = sqrt(data_fan_sum / model_fan_data_sum);
+            info("Ratio: " + std::to_string(value), 3);
+            efficiencies.fill(value);
             norm_geo_data.fill(1);
             norm_block_data.fill(1);
           }
         // efficiencies
         {
+          info("Copying model to fan_data...", 3);
           fan_data = model_fan_data;
+          info("Applying geo norm...", 3);
           apply_geo_norm(fan_data, norm_geo_data);
-          apply_block_norm(fan_data, norm_block_data);
+          if (do_block)
+            {
+              info("Applying block norm...", 3);
+              apply_block_norm(fan_data, norm_block_data);
+            }
           if (do_display)
             display(fan_data, "model*geo*block");
           for (int eff_iter_num = 1; eff_iter_num <= num_eff_iterations; ++eff_iter_num)
             {
+              info("Efficiency iteration number: " + std::to_string(iter_num), 3);
               iterate_efficiencies(efficiencies, data_fan_sums, fan_data);
+              info("Finished efficiency iteration", 3);
               {
                 char* out_filename = new char[out_filename_prefix.size() + 30];
                 sprintf(out_filename, "%s_%s_%d_%d.out", out_filename_prefix.c_str(), "eff", iter_num, eff_iter_num);
@@ -184,6 +272,7 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
               }
               if (do_KL)
                 {
+                  info("Calculating KL", 3);
                   apply_efficiencies(fan_data, efficiencies);
                   std::cerr << "measured*norm min " << measured_fan_data.find_min() << " ,max " << measured_fan_data.find_max()
                             << std::endl;
@@ -194,7 +283,8 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                   // now restore for further iterations
                   fan_data = model_fan_data;
                   apply_geo_norm(fan_data, norm_geo_data);
-                  apply_block_norm(fan_data, norm_block_data);
+                  if (do_block)
+                    apply_block_norm(fan_data, norm_block_data);
                 }
               if (do_display)
                 {
@@ -204,16 +294,18 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                   // now restore for further iterations
                   fan_data = model_fan_data;
                   apply_geo_norm(fan_data, norm_geo_data);
-                  apply_block_norm(fan_data, norm_block_data);
+                  if (do_block)
+                    apply_block_norm(fan_data, norm_block_data);
                 }
             }
         } // end efficiencies
 
-        // geo norm
-
+        info("Starting geo norm", 3);
+        info("Copying model to fan_data...", 3);
         fan_data = model_fan_data;
         apply_efficiencies(fan_data, efficiencies);
-        apply_block_norm(fan_data, norm_block_data);
+        if (do_block)
+          apply_block_norm(fan_data, norm_block_data);
 
         if (do_geo)
           iterate_geo_norm(norm_geo_data, measured_geo_data, fan_data);
@@ -248,13 +340,14 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
             display(fan_data, "geo norm");
           }
 
-        // block norm
+        if(do_block) //norm
         {
           fan_data = model_fan_data;
           apply_efficiencies(fan_data, efficiencies);
           apply_geo_norm(fan_data, norm_geo_data);
           if (do_block)
-            iterate_block_norm(norm_block_data, measured_block_data, fan_data);
+            {
+              iterate_block_norm(norm_block_data, measured_block_data, fan_data);
 #if 0
                  { // check
                  for (int a=0; a<measured_block_data.get_length(); ++a)
@@ -264,6 +357,7 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                  a,b,measured_block_data[a][b]);
                  }
 #endif
+            }
           {
             char* out_filename = new char[out_filename_prefix.size() + 30];
             sprintf(out_filename, "%s_%s_%d.out", out_filename_prefix.c_str(), "block", iter_num);
@@ -271,12 +365,12 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
             out << norm_block_data;
             delete[] out_filename;
           }
-          if (do_KL)
+          if (do_KL && do_block)
             {
               apply_block_norm(fan_data, norm_block_data);
               info(boost::format("KL %1%") % KL(measured_fan_data, fan_data, threshold_for_KL));
             }
-          if (do_display)
+          if (do_display && do_block)
             {
               fan_data.fill(1);
               apply_block_norm(fan_data, norm_block_data);
@@ -293,11 +387,14 @@ ML_estimate_component_based_normalisation(const std::string& out_filename_prefix
                                num_physical_transaxial_crystals_per_basic_unit / 2,
                                num_physical_rings,
                                num_physical_detectors_per_ring); // inputes have to be modified
-            BlockData3D block_data(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
 
             make_fan_sum_data(fan_sums, fan_data);
             make_geo_data(geo_data, fan_data);
-            make_block_data(block_data, measured_fan_data);
+            if (do_block)
+              {
+                BlockData3D block_data(num_axial_blocks, num_transaxial_blocks, num_axial_blocks - 1, num_transaxial_blocks - 1);
+                make_block_data(block_data, measured_fan_data);
+              }
 
             info(boost::format("KL on fans: %1%, %2") % KL(measured_fan_data, fan_data, 0) % KL(measured_geo_data, geo_data, 0));
           }
diff --git a/src/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin.cxx b/src/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin.cxx
index a10c68e1c..04330bbe2 100644
--- a/src/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin.cxx
+++ b/src/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin.cxx
@@ -283,14 +283,14 @@ PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin<Tar
           > 1)) // TODO this check needs to cover the case if we reconstruct only TOF bin 0
     {
       // sets non-tof backprojector for sensitivity calculation (clone of the back_projector + set projdatainfo to non-tof)
-      this->sens_proj_data_info_sptr = this->proj_data_info_sptr->create_non_tof_clone();
+      this->sens_proj_data_info_sptr = this->proj_data_info_sptr->create_single_tof_clone();
       // TODO disable caching of the matrix
       this->sens_backprojector_sptr.reset(projector_pair_sptr->get_back_projector_sptr()->clone());
       this->sens_backprojector_sptr->set_up(this->sens_proj_data_info_sptr, target_sptr);
     }
   else
     {
-      // just use the normal backprojector
+      // just use a signle TOF backprojector
       this->sens_proj_data_info_sptr = this->proj_data_info_sptr;
       this->sens_backprojector_sptr = projector_pair_sptr->get_back_projector_sptr();
     }
@@ -664,19 +664,34 @@ PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin<Tar
 {
   // TODO replace with call to distributable function
 
-  const int min_segment_num = this->proj_data_info_sptr->get_min_segment_num();
-  const int max_segment_num = this->proj_data_info_sptr->get_max_segment_num();
-
   info(boost::format("Calculating sensitivity for subset %1%") % subset_num);
 
-  int min_timing_pos_num = use_tofsens ? this->proj_data_info_sptr->get_min_tof_pos_num() : 0;
-  int max_timing_pos_num = use_tofsens ? this->proj_data_info_sptr->get_max_tof_pos_num() : 0;
-  if (min_timing_pos_num < 0 || max_timing_pos_num > 0)
+  // int min_timing_pos_num = use_tofsens ? this->proj_data_info_sptr->get_min_tof_pos_num() : 0;
+  // int max_timing_pos_num = use_tofsens ? this->proj_data_info_sptr->get_max_tof_pos_num() : 0;
+  // if (min_timing_pos_num < 0 || max_timing_pos_num > 1)
+  if ( this->sens_proj_data_info_sptr->get_num_tof_poss() > 1)
     error("TOF code for sensitivity needs work");
 
   this->sens_backprojector_sptr->start_accumulating_in_new_target();
 
-  // warning: has to be same as subset scheme used as in distributable_computation
+  int runs = 1;
+
+  //TODO: For long scanners we should run better split this. - Let's discuss more.
+  // if((this->proj_data_info_sptr->get_max_segment_num() -
+  //      this->proj_data_info_sptr->get_min_segment_num()) > 100)
+  //   {
+  //     runs = ceil(this->proj_data_info_sptr->get_max_segment_num() -
+  //      this->proj_data_info_sptr->get_min_segment_num())/ 100;
+  //     }
+
+  info(boost::format("The  number of runs needed for the sensitivity image is %1%:  ") % runs);
+
+  for (int run = 0; run < runs; ++run)
+    {
+      const int min_segment_num = this->proj_data_info_sptr->get_min_segment_num();
+      const int max_segment_num = this->proj_data_info_sptr->get_max_segment_num();
+      info(boost::format("Current run %1% of %2%:  ") % run % runs);
+      // warning: has to be same as subset scheme used as in distributable_computation
 #ifdef STIR_OPENMP
 #  if _OPENMP < 201107
 #    pragma omp parallel for schedule(dynamic)
@@ -684,40 +699,45 @@ PoissonLogLikelihoodWithLinearModelForMeanAndListModeDataWithProjMatrixByBin<Tar
 #    pragma omp parallel for collapse(2) schedule(dynamic)
 #  endif
 #endif
-  for (int segment_num = min_segment_num; segment_num <= max_segment_num; ++segment_num)
-    {
-      for (int view = this->sens_proj_data_info_sptr->get_min_view_num() + subset_num;
-           view <= this->sens_proj_data_info_sptr->get_max_view_num();
-           view += this->num_subsets)
+      for (int segment_num = min_segment_num; segment_num <= max_segment_num; ++segment_num)
         {
-          const ViewSegmentNumbers view_segment_num(view, segment_num);
+          for (int view = this->sens_proj_data_info_sptr->get_min_view_num() + subset_num;
+               view <= this->sens_proj_data_info_sptr->get_max_view_num();
+               view += this->num_subsets)
+            {
+              const ViewSegmentNumbers view_segment_num(view, segment_num);
 
-          if (!this->sens_backprojector_sptr->get_symmetries_used()->is_basic(view_segment_num))
-            continue;
-          // for (int timing_pos_num = min_timing_pos_num; timing_pos_num <= max_timing_pos_num; ++timing_pos_num)
-          {
-            shared_ptr<DataSymmetriesForViewSegmentNumbers> symmetries_used(
-                this->sens_backprojector_sptr->get_symmetries_used()->clone());
+              if (!this->sens_backprojector_sptr->get_symmetries_used()->is_basic(view_segment_num))
+                continue;
 
-            RelatedViewgrams<float> viewgrams = this->sens_proj_data_info_sptr->get_empty_related_viewgrams(
-                view_segment_num, symmetries_used, false); //, timing_pos_num);
+              info(boost::format("Current seg %1%,  view: %1% ") % segment_num, view);
+              // for (int timing_pos_num = min_timing_pos_num; timing_pos_num <= max_timing_pos_num; ++timing_pos_num)
+              {
+                shared_ptr<DataSymmetriesForViewSegmentNumbers> symmetries_used(
+                    this->sens_backprojector_sptr->get_symmetries_used()->clone());
 
-            viewgrams.fill(1.F);
-            // find efficiencies
-            {
-              this->normalisation_sptr->undo(viewgrams);
-            }
-            // backproject
-            {
-              const int min_ax_pos_num = viewgrams.get_min_axial_pos_num();
-              const int max_ax_pos_num = viewgrams.get_max_axial_pos_num();
+                RelatedViewgrams<float> viewgrams = this->sens_proj_data_info_sptr->get_empty_related_viewgrams(
+                    view_segment_num, symmetries_used, false); //, timing_pos_num);
+
+                viewgrams.fill(1.F);
+                // find efficiencies
+                {
+                  this->normalisation_sptr->undo(viewgrams);
+                }
+                // backproject
+                {
+                  const int min_ax_pos_num = viewgrams.get_min_axial_pos_num();
+                  const int max_ax_pos_num = viewgrams.get_max_axial_pos_num();
 
-              this->sens_backprojector_sptr->back_project(viewgrams, min_ax_pos_num, max_ax_pos_num);
+                  this->sens_backprojector_sptr->back_project(viewgrams, min_ax_pos_num, max_ax_pos_num);
+                }
+              }
             }
-          }
         }
+      this->sens_backprojector_sptr->get_output(sensitivity);
+      // Next run
     }
-  this->sens_backprojector_sptr->get_output(sensitivity);
+
 }
 
 template <typename TargetT>
diff --git a/src/recon_buildblock/ProjMatrixByBin.cxx b/src/recon_buildblock/ProjMatrixByBin.cxx
index 16a0987db..4ad7acb40 100644
--- a/src/recon_buildblock/ProjMatrixByBin.cxx
+++ b/src/recon_buildblock/ProjMatrixByBin.cxx
@@ -68,9 +68,20 @@ ProjMatrixByBin::enable_tof(const shared_ptr<const ProjDataInfo>& _proj_data_inf
 {
   if (v)
     {
-      tof_enabled = true;
-      gauss_sigma_in_mm = tof_delta_time_to_mm(proj_data_info_sptr->get_scanner_ptr()->get_timing_resolution()) / 2.355f;
-      r_sqrt2_gauss_sigma = 1.0f / (gauss_sigma_in_mm * static_cast<float>(sqrt(2.0)));
+      if (proj_data_info_sptr->get_num_tof_poss() == 1 || proj_data_info_sptr->get_scanner_ptr()->get_timing_resolution() == 0 ) // This is a special case that we do a nonTOF backprojection without coincidence window.
+        {
+          tof_enabled = false;
+        }
+      else
+        {
+          tof_enabled = true;
+          gauss_sigma_in_mm = tof_delta_time_to_mm(proj_data_info_sptr->get_scanner_ptr()->get_timing_resolution()) / 2.355f;
+          r_sqrt2_gauss_sigma = 1.0f / (gauss_sigma_in_mm * static_cast<float>(sqrt(2.0)));
+        }
+    }
+  else
+    {
+      tof_enabled = false;
     }
 }
 
diff --git a/src/swig/stir.i b/src/swig/stir.i
index 3047e4e1c..8878471a6 100644
--- a/src/swig/stir.i
+++ b/src/swig/stir.i
@@ -1,6 +1,6 @@
 /*
     Copyright (C) 2011-07-01 - 2012, Kris Thielemans
-    Copyright (C) 2013, 2014, 2015, 2018 - 2023 University College London
+    Copyright (C) 2013, 2014, 2015, 2018 - 2024 University College London
     Copyright (C) 2022 National Physical Laboratory
     Copyright (C) 2022 Positrigo
     This file is part of STIR.
@@ -155,6 +155,8 @@
 #include "stir/multiply_crystal_factors.h"
 #include "stir/decay_correction_factor.h"
 #include "stir/ML_norm.h"
+#include "stir/data/randoms_from_singles.h"
+#include "stir/recon_buildblock/ML_estimate_component_based_normalisation.h"
 #include "stir/spatial_transformation/InvertAxis.h"
 
 #include "stir/scatter/ScatterEstimation.h"
@@ -1070,8 +1072,11 @@ ADD_REPR(stir::Succeeded, %arg($self->succeeded() ? "yes" : "no"));
 %include "stir_reconstruction.i"
 
 %include "stir/multiply_crystal_factors.h"
+%include "stir/data/randoms_from_singles.h"
 %include "stir/decay_correction_factor.h"
 
+%include "stir_ML_norm.i"
+
 %shared_ptr(stir::ScatterSimulation);
 %shared_ptr(stir::RegisteredParsingObject<stir::SingleScatterSimulation,
   stir::ScatterSimulation, stir::ScatterSimulation>);
@@ -1090,15 +1095,5 @@ ADD_REPR(stir::Succeeded, %arg($self->succeeded() ? "yes" : "no"));
 %shared_ptr(stir::CreateTailMaskFromACFs);
 %include "stir/scatter/CreateTailMaskFromACFs.h"
 
-%shared_ptr(stir::FanProjData);
-%shared_ptr(stir::GeoData3D);
-%ignore operator<<;
-%ignore operator>>;
-%ignore stir::DetPairData::operator()(const int a, const int b) const;
-%ignore stir::DetPairData3D::operator()(const int a, const int b) const;
-%ignore stir::FanProjData::operator()(const int, const int, const int, const int) const;
-%ignore stir::GeoData3D::operator()(const int, const int, const int, const int) const;
-%include "stir/ML_norm.h"
-
 %shared_ptr(stir::InvertAxis);
 %include "stir/spatial_transformation/InvertAxis.h"
diff --git a/src/swig/stir_ML_norm.i b/src/swig/stir_ML_norm.i
new file mode 100644
index 000000000..0de978d88
--- /dev/null
+++ b/src/swig/stir_ML_norm.i
@@ -0,0 +1,28 @@
+/*
+    Copyright (C) 2024 University College London
+    Copyright (C) 2022 Positrigo
+    This file is part of STIR.
+
+    SPDX-License-Identifier: Apache-2.0
+
+    See STIR/LICENSE.txt for details
+*/
+/*!
+  \file
+  \brief Interface file for SWIG: stir::ML_estimate_component_based_normalisation etc
+
+  \author Kris Thielemans
+  \author Markus Jehl
+
+*/
+
+%shared_ptr(stir::FanProjData);
+%shared_ptr(stir::GeoData3D);
+%ignore operator<<;
+%ignore operator>>;
+%ignore stir::DetPairData::operator()(const int a, const int b) const;
+%ignore stir::DetPairData3D::operator()(const int a, const int b) const;
+%ignore stir::FanProjData::operator()(const int, const int, const int, const int) const;
+%ignore stir::GeoData3D::operator()(const int, const int, const int, const int) const;
+%include "stir/ML_norm.h"
+%include "stir/recon_buildblock/ML_estimate_component_based_normalisation.h"
diff --git a/src/utilities/CMakeLists.txt b/src/utilities/CMakeLists.txt
index d279c024f..466442363 100644
--- a/src/utilities/CMakeLists.txt
+++ b/src/utilities/CMakeLists.txt
@@ -21,6 +21,7 @@ set(${dir_EXE_SOURCES}
 	manip_projdata.cxx
 	display_projdata.cxx
 	do_linear_regression.cxx
+	construct_fanProjData_fromProjData
 	postfilter.cxx
 	compare_projdata.cxx
 	compare_image.cxx
@@ -42,6 +43,7 @@ set(${dir_EXE_SOURCES}
 	get_time_frame_info.cxx
 	generate_image.cxx
 	list_ROI_values.cxx
+	cache_lm.cxx
 	zoom_image.cxx
 	find_fwhm_in_image.cxx
 	abs_image.cxx
diff --git a/src/utilities/cache_lm.cxx b/src/utilities/cache_lm.cxx
new file mode 100644
index 000000000..ccc55eb8a
--- /dev/null
+++ b/src/utilities/cache_lm.cxx
@@ -0,0 +1,293 @@
+//
+//
+/*!
+  \file
+  \ingroup listmode_utilities
+
+\brief Program to bin listmode data to 3d sinograms
+
+\see cache_lm for info on parameter file format
+
+\author Nikos Efthimiou
+
+ */
+
+#include "stir/ProjDataInfoCylindricalNoArcCorr.h"
+#include "stir/ProjData.h"
+#include "stir/utilities.h"
+#include "stir/ParsingObject.h"
+#include "stir/listmode/ListModeData.h"
+#include "stir/listmode/ListRecord.h"
+#include "stir/listmode/CListEventCylindricalScannerWithDiscreteDetectors.h"
+#include "stir/IO/read_from_file.h"
+#include "stir/error.h"
+#include "stir/FilePath.h"
+
+using std::cerr;
+using std::min;
+
+USING_NAMESPACE_STIR
+
+    class LmCache : public ParsingObject
+{
+public:
+  LmCache(const char* const par_filename);
+
+  int max_segment_num_to_process;
+
+  shared_ptr<ListModeData> lm_data_sptr;
+
+  void cache_listmode_file();
+
+  std::string get_cache_filename(unsigned int file_id) const;
+
+  std::string get_cache_path() const;
+
+  Succeeded write_listmode_cache_file(unsigned int file_id) const;
+
+private:
+  void set_defaults() override;
+  void initialise_keymap() override;
+  bool post_processing() override;
+
+  unsigned long int cache_size;
+  std::string cache_path;
+  std::string input_filename;
+  std::string output_filename_prefix;
+  bool store_prompts;
+  int num_cache_files;
+long int num_events_to_use;
+  shared_ptr<ProjDataInfo> proj_data_info_sptr;
+  std::vector<BinAndCorr> record_cache;
+};
+
+
+void LmCache::set_defaults()
+{
+  max_segment_num_to_process = -1;
+  store_prompts = true;
+  num_cache_files = 0;
+  cache_size  = 150000000;
+ num_events_to_use = -1;
+}
+
+void
+LmCache::initialise_keymap()
+{
+  parser.add_start_key("lm_cache Parameters");
+  parser.add_key("input file", &input_filename);
+  parser.add_key("num_events_to_use", &num_events_to_use);
+  // parser.add_key("frame_definition file", &frame_definition_filename);
+  parser.add_key("output filename prefix", &output_filename_prefix);
+  parser.add_key("maximum absolute segment number to process", &max_segment_num_to_process);
+  // TODO can't do this yet
+  // if (CListEvent::has_delayeds())
+  {
+    parser.add_key("Store 'prompts'", &store_prompts);
+  }
+  parser.add_stop_key("END");
+}
+
+bool
+LmCache::post_processing()
+{
+  lm_data_sptr = read_from_file<ListModeData>(input_filename);
+
+  const int num_rings = lm_data_sptr->get_scanner().get_num_rings();
+  if (max_segment_num_to_process == -1)
+    max_segment_num_to_process = num_rings - 1;
+  else
+    max_segment_num_to_process = min(max_segment_num_to_process, num_rings - 1);
+
+  proj_data_info_sptr = lm_data_sptr->get_proj_data_info_sptr()->create_shared_clone()->create_single_tof_clone();
+  proj_data_info_sptr->reduce_segment_range(-max_segment_num_to_process, max_segment_num_to_process);
+
+  return false;
+}
+
+std::string
+LmCache::get_cache_path() const
+{
+  if (this->cache_path.size() > 0)
+    return this->cache_path;
+  else
+    return FilePath::get_current_working_directory();
+}
+
+
+LmCache::LmCache(const char* const par_filename)
+{
+  set_defaults();
+  if (par_filename != 0)
+    parse(par_filename);
+  else
+    ask_parameters();
+}
+
+std::string
+LmCache::get_cache_filename(unsigned int file_id) const
+{
+  std::string cache_filename = "my_CACHE" + std::to_string(file_id) + ".bin";
+  FilePath icache(cache_filename, false);
+  icache.prepend_directory_name(this->get_cache_path());
+  return icache.get_as_string();
+}
+
+Succeeded
+LmCache::write_listmode_cache_file(
+    unsigned int file_id) const
+{
+  const auto cache_filename = this->get_cache_filename(file_id);
+  const bool with_add = false;
+
+  {
+    info("Storing Listmode cache to file \"" + cache_filename + "\".");
+    // open the file, overwriting whatever was there before
+    std::ofstream fout(cache_filename, std::ios::out | std::ios::binary | std::ios::trunc);
+    if (!fout)
+      error("Error opening cache file \"" + cache_filename + "\" for writing.");
+
+    for (unsigned long int ie = 0; ie < record_cache.size(); ++ie)
+      {
+        Bin tmp = record_cache[ie].my_bin;
+        if (with_add)
+          tmp.set_bin_value(record_cache[ie].my_corr);
+        fout.write((char*)&tmp, sizeof(Bin));
+      }
+    if (!fout)
+      error("Error writing to cache file \"" + cache_filename + "\".");
+
+    fout.close();
+  }
+
+  return Succeeded::yes;
+}
+
+void
+LmCache::cache_listmode_file()
+{
+
+         // warning("Looking for existing cache files such as \"" + this->get_cache_filename(0) + "\".\n"
+         //         + "We will be ignoring any time frame definitions as well as num_events_to_use!");
+         // // find how many cache files there are
+         // num_cache_files = 0;
+         // while (true)
+         //   {
+         //     if (!FilePath::exists(this->get_cache_filename(this->num_cache_files)))
+         //       break;
+         //     ++this->num_cache_files;
+         //   }
+         // if (!this->num_cache_files)
+         //   error("No cache files found.");
+         // return ; // Stop here!!!
+
+  num_cache_files = 0;
+
+  info("Listmode reconstruction: Creating cache...", 2);
+
+  record_cache.clear();
+  try
+    {
+      record_cache.reserve(this->cache_size);
+    }
+  catch (...)
+    {
+      error("Listmode: cannot allocate cache for " + std::to_string(this->cache_size) + " records. Reduce cache size.");
+    }
+
+  this->lm_data_sptr->reset();
+  const shared_ptr<ListRecord> record_sptr = this->lm_data_sptr->get_empty_record_sptr();
+
+  double current_time = 0.;
+  unsigned long int cached_events = 0;
+
+  bool stop_caching = false;
+  record_cache.reserve(this->cache_size);
+
+  while (true) // keep caching across multiple files.
+    {
+      record_cache.clear();
+
+      while (true) // Start for the current cache
+        {
+          if (this->lm_data_sptr->get_next_record(*record_sptr) == Succeeded::no)
+            {
+              stop_caching = true;
+              break;
+            }
+
+
+          if (record_sptr->is_event() && record_sptr->event().is_prompt())
+            {
+              BinAndCorr tmp;
+              tmp.my_bin.set_bin_value(1.0);
+              record_sptr->event().get_bin(tmp.my_bin, *this->proj_data_info_sptr);
+
+              if (tmp.my_bin.get_bin_value() != 1.0f
+                  || tmp.my_bin.segment_num() < this->proj_data_info_sptr->get_min_segment_num()
+                  || tmp.my_bin.segment_num() > this->proj_data_info_sptr->get_max_segment_num()
+                  || tmp.my_bin.tangential_pos_num() < this->proj_data_info_sptr->get_min_tangential_pos_num()
+                  || tmp.my_bin.tangential_pos_num() > this->proj_data_info_sptr->get_max_tangential_pos_num()
+                  || tmp.my_bin.axial_pos_num() < this->proj_data_info_sptr->get_min_axial_pos_num(tmp.my_bin.segment_num())
+                  || tmp.my_bin.axial_pos_num() > this->proj_data_info_sptr->get_max_axial_pos_num(tmp.my_bin.segment_num())
+                  || tmp.my_bin.timing_pos_num() < this->proj_data_info_sptr->get_min_tof_pos_num()
+                  || tmp.my_bin.timing_pos_num() > this->proj_data_info_sptr->get_max_tof_pos_num())
+                {
+                  continue;
+                }
+              try
+                {
+                  record_cache.push_back(tmp);
+                  ++cached_events;
+                }
+              catch (...)
+                {
+                  // should never get here due to `reserve` statement above, but best to check...
+                  error("Listmode: running out of memory for cache. Current size: "
+                        + std::to_string(this->record_cache.size()) + " records");
+                }
+
+              if (record_cache.size() > 1 && record_cache.size() % 500000L == 0)
+                info(boost::format("Cached Prompt Events (this cache): %1% ") % record_cache.size());
+
+              if (this->num_events_to_use > 0)
+                if (cached_events >= static_cast<std::size_t>(this->num_events_to_use))
+                  {
+                    stop_caching = true;
+                    break;
+                  }
+
+              if (record_cache.size() == this->cache_size)
+                break; // cache is full. go to next cache.
+            }
+        }
+
+      if (write_listmode_cache_file(this->num_cache_files) == Succeeded::no)
+        {
+          error("Error writing cache file!");
+        }
+      ++this->num_cache_files;
+
+      if (stop_caching)
+        break;
+    }
+  info(boost::format("Cached Events: %1% ") % cached_events);
+  return;
+
+}
+
+
+int
+main(int argc, char* argv[])
+{
+
+  if (argc != 1 && argc != 2)
+    {
+      cerr << "Usage: " << argv[0] << " [par_file]\n";
+      exit(EXIT_FAILURE);
+    }
+  LmCache lm_cache(argc == 2 ? argv[1] : 0);
+  lm_cache.cache_listmode_file();
+
+  return EXIT_SUCCESS;
+}
diff --git a/src/utilities/construct_fanProjData_fromProjData.cxx b/src/utilities/construct_fanProjData_fromProjData.cxx
new file mode 100644
index 000000000..d416b1d60
--- /dev/null
+++ b/src/utilities/construct_fanProjData_fromProjData.cxx
@@ -0,0 +1,70 @@
+//
+//
+/*!
+  \file
+  \ingroup utilities
+
+\brief Construct stir::FanProjData from stir::ProjData
+
+\author Nikos Efthimiou
+
+ */
+
+#include "stir/ProjDataInfoCylindricalNoArcCorr.h"
+#include "stir/ProjData.h"
+#include "stir/utilities.h"
+#include "stir/NumericType.h"
+#include "stir/ByteOrder.h"
+#include "stir/ParsingObject.h"
+#include "stir/FilePath.h"
+#include "stir/ML_norm.h"
+#include "stir/IO/write_data.h"
+
+#include <iostream>
+#include <fstream>
+#include "stir/warning.h"
+#include "stir/error.h"
+
+using std::ios;
+using std::iostream;
+using std::streamoff;
+using std::fstream;
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::vector;
+
+USING_NAMESPACE_STIR
+
+static void print_usage_and_exit()
+{
+  std::cerr << "\nUsage construct_fanProjData_fromProjData [output_filename] [input_projdata_filename]\n" <<std::endl;
+  exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char* argv[])
+{
+
+  if(argc != 3)
+      print_usage_and_exit();
+
+   shared_ptr<ProjData> in_proj_data_sptr = ProjData::read_from_file(argv[2]);
+
+   const std::string output_file_name = argv[1];
+   FanProjData out_fan_data;
+  make_fan_data_remove_gaps(out_fan_data, *in_proj_data_sptr);
+
+   shared_ptr<iostream> fan_stream(
+       new std::fstream(output_file_name, std::ios::out | std::ios::binary));
+   // Array<1, float> value(1);
+   float scale = 1.f;
+
+   info("Writing to disk...");
+   if(write_data(*fan_stream, out_fan_data, NumericType::Type::FLOAT,scale,
+                  ByteOrder::Order::little_endian) == Succeeded::no)
+    error("Error writing FanProjData\n");
+
+   info("Finished writing.");
+  return EXIT_SUCCESS;
+}
diff --git a/src/utilities/find_ML_normfactors3D.cxx b/src/utilities/find_ML_normfactors3D.cxx
index 2e78c0cbf..05c28f7d6 100644
--- a/src/utilities/find_ML_normfactors3D.cxx
+++ b/src/utilities/find_ML_normfactors3D.cxx
@@ -25,13 +25,17 @@
 #include <iostream>
 #include <string>
 
+#include "stir/FilePath.h"
+
 static void
 print_usage_and_exit(const std::string& program_name)
 {
   std::cerr << "Usage: " << program_name
-            << " [--display | --print-KL | --include-block-timing-model | --for-symmetry-per-block] \\\n"
+            << " [--display | --print-KL | --include-block-timing-model | --for-symmetry-per-block | --model-in-fan-data | --use-listmode-cache] \\\n"
             << " out_filename_prefix measured_data model num_iterations num_eff_iterations\n"
-            << " set num_iterations to 0 to do only efficiencies\n";
+            << " set num_iterations to 0 to do only efficiencies\n"
+            << " NOTE: If you already have the model fansums they need to be in the same path as the projdata with the same name and suffix .fsum\n";
+
   exit(EXIT_FAILURE);
 }
 
@@ -51,6 +55,10 @@ main(int argc, char** argv)
   bool do_block = false;
   bool do_symmetry_per_block = false;
 
+  bool use_lm_cache = false;
+  bool use_model_fandata = false;
+  std::string model_fandata_filename;
+
   // first process command line options
   while (argc > 0 && argv[0][0] == '-' && argc >= 1)
     {
@@ -84,6 +92,18 @@ main(int argc, char** argv)
           --argc;
           ++argv;
         }
+      else if (strcmp(argv[0], "--model-in-fandata") == 0)
+        {
+          use_model_fandata = true;
+          --argc;
+          ++argv;
+        }
+      else if (strcmp(argv[0], "--use-listmode-cache") == 0)
+        {
+          use_lm_cache = true;
+          --argc;
+          ++argv;
+        }
       else
         print_usage_and_exit(program_name);
     }
@@ -98,7 +118,18 @@ main(int argc, char** argv)
     }
   const int num_eff_iterations = atoi(argv[5]);
   const int num_iterations = atoi(argv[4]);
+
   shared_ptr<ProjData> model_data = ProjData::read_from_file(argv[3]);
+  FilePath fp(argv[3] );
+  fp.replace_extension(".fsum");
+  if (!fp.exists(fp.get_as_string()))
+    {
+      warning("We could not find a fansum model. Defaulting to ProjData.");
+      use_model_fandata = false;
+    }
+  else
+    model_fandata_filename = fp.get_as_string();
+
   shared_ptr<ProjData> measured_data = ProjData::read_from_file(argv[2]);
   const std::string out_filename_prefix = argv[1];
 
@@ -114,7 +145,9 @@ main(int argc, char** argv)
                                             do_block,
                                             do_symmetry_per_block,
                                             do_KL,
-                                            do_display);
+                                            do_display,
+                                            use_lm_cache,
+                                            use_model_fandata, model_fandata_filename);
 
   timer.stop();
   info(boost::format("CPU time %1% secs") % timer.value());