diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index 28aca0aeed25090ec2c6bb7655c9ead3757b841d..3f58c55396d5ca2f841b066f043ab8d0e6b9e660 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -70,6 +70,20 @@ __inline__ __device__
 unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
 { return atomicSub((unsigned int*)dest,val); }
 
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val )
+{ return atomic_fetch_add(dest,-val); }
+
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile float * const dest , const float val )
+{ return atomicAdd((float*)dest,-val); }
+
+#if ( 600 <= __CUDA_ARCH__ )
+__inline__ __device__
+unsigned int atomic_fetch_sub( volatile double * const dest , const double val )
+{ return atomicAdd((double*)dest,-val); }
+#endif
+
 template < typename T >
 __inline__ __device__
 T atomic_fetch_sub( volatile T * const dest ,