From 6d60075d3bef9f874fed97ef2c3a7412b4a47e0a Mon Sep 17 00:00:00 2001 From: Richard Berger <richard.berger@temple.edu> Date: Thu, 9 Aug 2018 02:33:35 -0400 Subject: [PATCH] GPU Package: Use __shfl_xor_sync starting with CUDA 9 --- lib/gpu/lal_preprocessor.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index 69a8e61bd4..566a451c21 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -119,6 +119,8 @@ #define BLOCK_ELLIPSE 128 #define MAX_SHARED_TYPES 11 +#if (__CUDACC_VER_MAJOR__ < 9) + #ifdef _SINGLE_SINGLE #define shfl_xor __shfl_xor #else @@ -132,6 +134,25 @@ ucl_inline double shfl_xor(double var, int laneMask, int width) { } #endif +#else + +#ifdef _SINGLE_SINGLE +ucl_inline double shfl_xor(double var, int laneMask, int width) { + return __shfl_xor_sync(0xffffffff, var, laneMask, width); +} +#else +ucl_inline double shfl_xor(double var, int laneMask, int width) { + int2 tmp; + tmp.x = __double2hiint(var); + tmp.y = __double2loint(var); + tmp.x = __shfl_xor_sync(0xffffffff,tmp.x,laneMask,width); + tmp.y = __shfl_xor_sync(0xffffffff,tmp.y,laneMask,width); + return __hiloint2double(tmp.x,tmp.y); +} +#endif + +#endif + #endif #endif -- GitLab