cms-patatrack · makortel · Jan 4, 2021 · Dec 29, 2020 · Dec 29, 2020 · Dec 29, 2020
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@
     * [`cuda`](#cuda)
     * [`cudadev`](#cudadev)
     * [`cudauvm`](#cudauvm)
+    * [`cudacompat`](#cudacompat)
     * [`kokkos` and `kokkostest`](#kokkos-and-kokkostest)
 * [Code structure](#code-structure)
 * [Build system](#build-system)
@@ -37,6 +38,7 @@ In addition, the individual programs assume the following be found from the syst
 | `cuda`       |                    | :heavy_check_mark:          |                                                                                                                  |
 | `cudadev`    |                    | :heavy_check_mark:          |                                                                                                                  |
 | `cudauvm`    |                    | :heavy_check_mark:          |                                                                                                                  |
+| `cudacompat` |                    | :heavy_check_mark:          |                                                                                                                  |
 | `kokkostest` | :heavy_check_mark: | :heavy_check_mark:          |                                                                                                                  |
 | `kokkos`     | :heavy_check_mark: | :heavy_check_mark:          |                                                                                                                  |
 | `alpakatest` |                    | :heavy_check_mark:          |                                                                                                                  |
@@ -54,6 +56,7 @@ All other dependencies (listed below) are downloaded and built automatically
 | `cuda`       | :heavy_check_mark:                  | :heavy_check_mark:                   |                                            |                                     |                                                  |
 | `cudadev`    | :heavy_check_mark:                  | :heavy_check_mark:                   |                                            |                                     |                                                  |
 | `cudauvm`    | :heavy_check_mark:                  | :heavy_check_mark:                   |                                            |                                     |                                                  |
+| `cudacompat` | :heavy_check_mark:                  | :heavy_check_mark:                   |                                            |                                     |                                                  |
 | `kokkostest` | :heavy_check_mark:                  |                                      | :heavy_check_mark:                         |                                     |                                                  |
 | `kokkos`     | :heavy_check_mark:                  | :heavy_check_mark:                   | :heavy_check_mark:                         |                                     |                                                  |
 | `alpakatest` | :heavy_check_mark:                  |                                      |                                            | :heavy_check_mark:                  | :heavy_check_mark:                               |
@@ -79,6 +82,7 @@ downloaded automatically during the build process.
 | `cuda`       | CUDA version (frozen)            | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | `cudadev`    | CUDA version (development)       | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | `cudauvm`    | CUDA version with managed memory | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
+| `cudacompat` | CPU version (with `cudaCompat`)  | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | `kokkostest` | Kokkos FW test                   | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |                    |                    |                    |                    |                    |                    |                    |
 | `kokkos`     | Kokkos version                   | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | `alpakatest` | Alpaka FW test                   | :heavy_check_mark: |                    | :white_check_mark: |                    |                    |                    |                    |                    |                    |                    |
@@ -183,6 +187,14 @@ To use managed memory also for temporary device-only allocations, compile with
 make cudauvm ... USER_CXXFLAGS="-DCUDAUVM_MANAGED_TEMPORARY"
 ```
 
+#### `cudacompat`
+
+This program is a fork of `cuda` by extending the use of `cudaCompat` to clustering and RecHits. The aim is to run the same code on CPU. Currently, however, the program requires a GPU because of (still) using pinned host memory in a few places. In the future the program could be extended to provide both CUDA and CPU flavors.
+
+The program contains the changes from following external PRs on top of `cuda`
+* [cms-patatrack/cmssw#586](https://github.com/cms-patatrack/cmssw/pull/586)
+* [cms-patatrack/cmssw#588](https://github.com/cms-patatrack/cmssw/pull/588)
+
 #### `kokkos` and `kokkostest`
 
 ```bash

diff --git a/run-scan.py b/run-scan.py
@@ -14,6 +14,7 @@
     "fwtest": 1,
     "cuda": {"": 100, "transfer": 100},
     "cudauvm": {"": 100, "transfer": 100},
+    "cudacompat": {"": 8},
 }
 
 result_re = re.compile("Processed (?P<events>\d+) events in (?P<time>\S+) seconds, throughput (?P<throughput>\S+) events/s")

diff --git a/src/cudacompat/CUDACore/AtomicPairCounter.h b/src/cudacompat/CUDACore/AtomicPairCounter.h
@@ -0,0 +1,58 @@
+#ifndef HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
+#define HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
+
+#include <cstdint>
+
+#include "CUDACore/cudaCompat.h"
+
+namespace cms {
+  namespace cuda {
+
+    class AtomicPairCounter {
+    public:
+      using c_type = unsigned long long int;
+
+      AtomicPairCounter() {}
+      AtomicPairCounter(c_type i) { counter.ac = i; }
+
+      __device__ __host__ AtomicPairCounter& operator=(c_type i) {
+        counter.ac = i;
+        return *this;
+      }
+
+      struct Counters {
+        uint32_t n;  // in a "One to Many" association is the number of "One"
+        uint32_t m;  // in a "One to Many" association is the total number of associations
+      };
+
+      union Atomic2 {
+        Counters counters;
+        c_type ac;
+      };
+
+      static constexpr c_type incr = 1UL << 32;
+
+      __device__ __host__ Counters get() const { return counter.counters; }
+
+      // increment n by 1 and m by i.  return previous value
+      __host__ __device__ __forceinline__ Counters add(uint32_t i) {
+        c_type c = i;
+        c += incr;
+        Atomic2 ret;
+#ifdef __CUDA_ARCH__
+        ret.ac = atomicAdd(&counter.ac, c);
+#else
+        ret.ac = counter.ac;
+        counter.ac += c;
+#endif
+        return ret.counters;
+      }
+
+    private:
+      Atomic2 counter;
+    };
+
+  }  // namespace cuda
+}  // namespace cms
+
+#endif  // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h