From da7c18cc7c24a273e316b6d82dbed27f1c846d09 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Fri, 18 Dec 2020 23:22:55 +0800
Subject: [PATCH] Changes that should not cause crash, but do.

---
 k2/csrc/intersect_pruned.cu | 2 +-
 k2/csrc/utils.h             | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/k2/csrc/intersect_pruned.cu b/k2/csrc/intersect_pruned.cu
index 6999ad200..6454f2c09 100644
--- a/k2/csrc/intersect_pruned.cu
+++ b/k2/csrc/intersect_pruned.cu
@@ -846,7 +846,7 @@ class MultiGraphDenseIntersectPruned {
             // Set the forward log-like of the dest state to the largest of any
             // of those of the incoming arcs.  Note: we initialized this in
             // lambda_init_loglike above.
-            AtomicMax(&(kept_states_data[state_idx01].forward_loglike),
+            atomicMax(&(kept_states_data[state_idx01].forward_loglike),
                       end_loglike_int);
           });
     }
diff --git a/k2/csrc/utils.h b/k2/csrc/utils.h
index e9a570300..74426f806 100644
--- a/k2/csrc/utils.h
+++ b/k2/csrc/utils.h
@@ -601,14 +601,10 @@ __host__ __device__ __forceinline__ float OrderedIntToFloat(int32_t i) {
   host version of Cuda's atomicMax function, marked __host__ (the default) for
   clarity.  So we can use this in lambdas that run on both host and device.
  */
-__host__ __device__ __forceinline__ int32_t AtomicMax(int32_t *address, int32_t val) {
-#if defined(__CUDA_ARCH__)
-  return atomicMax(address, val);
-#else
+__host__ __forceinline__ int32_t atomicMax(int32_t *address, int32_t val) {
   int32_t old = *address;
   if (old < val) *address = val;
   return old;
-#endif
 }
 
 // have to figure out if there's a better place to put this