From 6d60075d3bef9f874fed97ef2c3a7412b4a47e0a Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 9 Aug 2018 02:33:35 -0400
Subject: [PATCH] GPU Package: Use __shfl_xor_sync starting with CUDA 9

---
 lib/gpu/lal_preprocessor.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h
index 69a8e61bd4..566a451c21 100644
--- a/lib/gpu/lal_preprocessor.h
+++ b/lib/gpu/lal_preprocessor.h
@@ -119,6 +119,8 @@
 #define BLOCK_ELLIPSE 128
 #define MAX_SHARED_TYPES 11
 
+#if (__CUDACC_VER_MAJOR__ < 9)
+
 #ifdef _SINGLE_SINGLE
 #define shfl_xor __shfl_xor
 #else
@@ -132,6 +134,25 @@ ucl_inline double shfl_xor(double var, int laneMask, int width) {
 }
 #endif
 
+#else
+
+#ifdef _SINGLE_SINGLE
+ucl_inline double shfl_xor(double var, int laneMask, int width) {
+  return __shfl_xor_sync(0xffffffff, var, laneMask, width);
+}
+#else
+ucl_inline double shfl_xor(double var, int laneMask, int width) {
+  int2 tmp;
+  tmp.x = __double2hiint(var);
+  tmp.y = __double2loint(var);
+  tmp.x = __shfl_xor_sync(0xffffffff,tmp.x,laneMask,width);
+  tmp.y = __shfl_xor_sync(0xffffffff,tmp.y,laneMask,width);
+  return __hiloint2double(tmp.x,tmp.y);
+}
+#endif
+
+#endif
+
 #endif
 
 #endif
-- 
GitLab