diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu
index c5f98567d937cfc093653be7e131a7575588bf15..a6821e1cf73f7ddd6aa8681a6ed86ecf3182e762 100644
--- a/lib/gpu/lal_born_coul_long_cs.cu
+++ b/lib/gpu/lal_born_coul_long_cs.cu
@@ -121,7 +121,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
           if (factor_coul<(numtyp)1.0) {
             numtyp grij = g_ewald * (r+EPS_EWALD);
             numtyp expm2 = ucl_exp(-grij*grij);
-            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
             numtyp u = (numtyp)1.0 - t;
             _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
             prefactor /= (r+EPS_EWALD);
@@ -132,7 +132,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
           } else {
             numtyp grij = g_ewald * r;
             numtyp expm2 = ucl_exp(-grij*grij);
-            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
             numtyp u = (numtyp)1.0 - t;
             _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
             prefactor /= r;
@@ -262,7 +262,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
           if (factor_coul<(numtyp)1.0) {
             numtyp grij = g_ewald * (r+EPS_EWALD);
             numtyp expm2 = ucl_exp(-grij*grij);
-            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
             numtyp u = (numtyp)1.0 - t;
             _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
             prefactor /= (r+EPS_EWALD);
@@ -271,10 +271,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
             // scaling of the overall force shall be consistent
             r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
           } else {
-
             numtyp grij = g_ewald * r;
             numtyp expm2 = ucl_exp(-grij*grij);
-            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
             numtyp u = (numtyp)1.0 - t;
             _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
             prefactor /= r;
diff --git a/lib/gpu/lal_coul_long_cs.cu b/lib/gpu/lal_coul_long_cs.cu
index 1ff9445f4ceca6563ac166d40bd4782fc40652ea..c0387661294adfaba594194cfd2d235bcb0f2ded 100644
--- a/lib/gpu/lal_coul_long_cs.cu
+++ b/lib/gpu/lal_coul_long_cs.cu
@@ -205,7 +205,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
         if (factor_coul<(numtyp)1.0) {
           numtyp grij = g_ewald * (r+EPS_EWALD);
           numtyp expm2 = ucl_exp(-grij*grij);
-          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
           numtyp u = (numtyp)1.0 - t;
           _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
           prefactor /= (r+EPS_EWALD);
@@ -217,7 +217,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
         } else {
           numtyp grij = g_ewald * r;
           numtyp expm2 = ucl_exp(-grij*grij);
-          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
           numtyp u = (numtyp)1.0 - t;
           _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
           prefactor /= r;
@@ -320,7 +320,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
         if (factor_coul<(numtyp)1.0) {
           numtyp grij = g_ewald * (r+EPS_EWALD);
           numtyp expm2 = ucl_exp(-grij*grij);
-          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
           numtyp u = (numtyp)1.0 - t;
           _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
           prefactor /= (r+EPS_EWALD);
@@ -331,7 +331,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
         } else {
           numtyp grij = g_ewald * r;
           numtyp expm2 = ucl_exp(-grij*grij);
-          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
           numtyp u = (numtyp)1.0 - t;
           _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
           prefactor /= r;
diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index c3c2ce168ccf99476d8a9b11c538bd3338c11e82..9b029d15cbdfbd8c322799515e2f17217842ac31 100755
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -35,6 +35,8 @@ action pair_beck_gpu.cpp
 action pair_beck_gpu.h 
 action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp
 action pair_born_coul_long_gpu.h pair_born_coul_long.cpp
+action pair_born_coul_long_cs_gpu.cpp pair_born_coul_long_cs.cpp
+action pair_born_coul_long_cs_gpu.h pair_born_coul_long_cs.cpp
 action pair_born_coul_wolf_gpu.cpp
 action pair_born_coul_wolf_gpu.h
 action pair_born_gpu.cpp
@@ -55,6 +57,8 @@ action pair_coul_dsf_gpu.cpp
 action pair_coul_dsf_gpu.h
 action pair_coul_long_gpu.cpp pair_coul_long.cpp
 action pair_coul_long_gpu.h pair_coul_long.cpp
+action pair_coul_long_cs_gpu.cpp pair_coul_long_cs.cpp
+action pair_coul_long_cs_gpu.h pair_coul_long_cs.cpp
 action pair_dpd_gpu.cpp
 action pair_dpd_gpu.h
 action pair_dpd_tstat_gpu.cpp