From da7c18cc7c24a273e316b6d82dbed27f1c846d09 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 18 Dec 2020 23:22:55 +0800 Subject: [PATCH] Changes that should not cause crash, but do. --- k2/csrc/intersect_pruned.cu | 2 +- k2/csrc/utils.h | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/k2/csrc/intersect_pruned.cu b/k2/csrc/intersect_pruned.cu index 6999ad200..6454f2c09 100644 --- a/k2/csrc/intersect_pruned.cu +++ b/k2/csrc/intersect_pruned.cu @@ -846,7 +846,7 @@ class MultiGraphDenseIntersectPruned { // Set the forward log-like of the dest state to the largest of any // of those of the incoming arcs. Note: we initialized this in // lambda_init_loglike above. - AtomicMax(&(kept_states_data[state_idx01].forward_loglike), + atomicMax(&(kept_states_data[state_idx01].forward_loglike), end_loglike_int); }); } diff --git a/k2/csrc/utils.h b/k2/csrc/utils.h index e9a570300..74426f806 100644 --- a/k2/csrc/utils.h +++ b/k2/csrc/utils.h @@ -601,14 +601,10 @@ __host__ __device__ __forceinline__ float OrderedIntToFloat(int32_t i) { host version of Cuda's atomicMax function, marked __host__ (the default) for clarity. So we can use this in lambdas that run on both host and device. */ -__host__ __device__ __forceinline__ int32_t AtomicMax(int32_t *address, int32_t val) { -#if defined(__CUDA_ARCH__) - return atomicMax(address, val); -#else +__host__ __forceinline__ int32_t atomicMax(int32_t *address, int32_t val) { int32_t old = *address; if (old < val) *address = val; return old; -#endif } // have to figure out if there's a better place to put this