diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index 69a8e61bd481b5caeedfa46991dcfcb8a9f0e9e6..566a451c21b83c34090fcc8a8a150fe42d4eef17 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -119,6 +119,8 @@ #define BLOCK_ELLIPSE 128 #define MAX_SHARED_TYPES 11 +#if (__CUDACC_VER_MAJOR__ < 9) + #ifdef _SINGLE_SINGLE #define shfl_xor __shfl_xor #else @@ -132,6 +134,25 @@ ucl_inline double shfl_xor(double var, int laneMask, int width) { } #endif +#else + +#ifdef _SINGLE_SINGLE +ucl_inline double shfl_xor(double var, int laneMask, int width) { + return __shfl_xor_sync(0xffffffff, var, laneMask, width); +} +#else +ucl_inline double shfl_xor(double var, int laneMask, int width) { + int2 tmp; + tmp.x = __double2hiint(var); + tmp.y = __double2loint(var); + tmp.x = __shfl_xor_sync(0xffffffff,tmp.x,laneMask,width); + tmp.y = __shfl_xor_sync(0xffffffff,tmp.y,laneMask,width); + return __hiloint2double(tmp.x,tmp.y); +} +#endif + +#endif + #endif #endif