From 5a4a7cebc10788381f0dd230743ef064df58c525 Mon Sep 17 00:00:00 2001 From: Stan Moore <stamoor@sandia.gov> Date: Wed, 16 May 2018 16:12:05 -0600 Subject: [PATCH] Fix performance regression in KOKKOS package --- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 28aca0aeed..3f58c55396 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -70,6 +70,20 @@ __inline__ __device__ unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) { return atomicSub((unsigned int*)dest,val); } +__inline__ __device__ +unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val ) +{ return atomic_fetch_add(dest,-val); } + +__inline__ __device__ +unsigned int atomic_fetch_sub( volatile float * const dest , const float val ) +{ return atomicAdd((float*)dest,-val); } + +#if ( 600 <= __CUDA_ARCH__ ) +__inline__ __device__ +unsigned int atomic_fetch_sub( volatile double * const dest , const double val ) +{ return atomicAdd((double*)dest,-val); } +#endif + template < typename T > __inline__ __device__ T atomic_fetch_sub( volatile T * const dest , -- GitLab