From 5a4a7cebc10788381f0dd230743ef064df58c525 Mon Sep 17 00:00:00 2001
From: Stan Moore <stamoor@sandia.gov>
Date: Wed, 16 May 2018 16:12:05 -0600
Subject: [PATCH] Fix performance regression in KOKKOS package

---
 .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp      | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index 28aca0aeed..3f58c55396 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -70,6 +70,20 @@ __inline__ __device__
 unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
 { return atomicSub((unsigned int*)dest,val); }
 
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val )
+{ return atomic_fetch_add(dest,-val); }
+
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile float * const dest , const float val )
+{ return atomicAdd((float*)dest,-val); }
+
+#if ( 600 <= __CUDA_ARCH__ )
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile double * const dest , const double val )
+{ return atomicAdd((double*)dest,-val); }
+#endif
+
 template < typename T >
 __inline__ __device__
 T atomic_fetch_sub( volatile T * const dest ,
-- 
GitLab