diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index d264a933a1f160934c0a9f367bd32343e9902680..c6fa1aa5601dc5f71c38d1ece9dc0b9ab8fbfb37 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -127,10 +127,11 @@ void Neighbor::alloc(bool &success) { dev_packed.clear(); success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev, _packed_permissions)==UCL_SUCCESS); - dev_acc.clear(); - success=success && (dev_acc.alloc(_max_atoms,*dev, + dev_ilist.clear(); + success=success && (dev_ilist.alloc(_max_atoms,*dev, UCL_READ_WRITE)==UCL_SUCCESS); - _c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes(); + dev_ilist.clear(); + _c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes(); } if (_max_host>0) { nbor_host.clear(); @@ -197,7 +198,7 @@ void Neighbor::clear() { host_packed.clear(); host_acc.clear(); - dev_acc.clear(); + dev_ilist.clear(); dev_nbor.clear(); nbor_host.clear(); dev_packed.clear(); @@ -289,7 +290,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj, int i=ilist[ii]; host_view[i] = ii; } - ucl_copy(dev_acc,host_view,true); + ucl_copy(dev_ilist,host_view,true); time_nbor.stop(); diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 01f7b027981a3a18036b6eb072f60d9c23de33d4..996deaff6d12bbbfa6789ac12dae0910b7f7f699 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -200,7 +200,7 @@ class Neighbor { /// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2) UCL_H_Vec<int> host_acc; /// Device storage for accessing atom indices from the neighbor list (3-body) - UCL_D_Vec<int> dev_acc; + UCL_D_Vec<int> dev_ilist; // ----------------- Data for GPU Neighbor Calculation --------------- diff --git a/lib/gpu/lal_sw.cpp b/lib/gpu/lal_sw.cpp index 24984e48785d665c63ad9cae54dc45812846a5f2..46b6382a6097aeaab518a2cc509b6eeb5d92e41b 100644 --- a/lib/gpu/lal_sw.cpp +++ b/lib/gpu/lal_sw.cpp @@ -243,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -252,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu index 517de70691cd735cac9c9dab5cc849a257f5d373..3b6de5a683d603e74f53b8c2364942fd0f83f2cf 100644 --- a/lib/gpu/lal_sw.cu +++ b/lib/gpu/lal_sw.cu @@ -544,7 +544,7 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_, const int nelements, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -614,13 +614,13 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; @@ -698,7 +698,7 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_, const int nelements, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -768,13 +768,13 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; diff --git a/lib/gpu/lal_tersoff.cpp b/lib/gpu/lal_tersoff.cpp index af1e2865201eb03d1904b90a88b170c4ebaeffc9..ef55b98a2dba21347cc9830858f9663f0ff21b31 100644 --- a/lib/gpu/lal_tersoff.cpp +++ b/lib/gpu/lal_tersoff.cpp @@ -311,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -320,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index cec0ccc44341c10f9678e57b8de628d169980262..836f05660d7b066dcaf7398ed11e10e96eec7b07 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -696,7 +696,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -777,13 +777,13 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; @@ -941,7 +941,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -1022,13 +1022,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; diff --git a/lib/gpu/lal_tersoff_mod.cpp b/lib/gpu/lal_tersoff_mod.cpp index dc25fdadff1fdaa225428ff3b729c45162a349ba..3cbb488cab878c497c0f0cf7eede5f9c5ce9663c 100644 --- a/lib/gpu/lal_tersoff_mod.cpp +++ b/lib/gpu/lal_tersoff_mod.cpp @@ -311,7 +311,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -320,7 +320,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu index 576359b5146246d74078b0deff98a700bbdcb533..dfb94c41451c4a553a244e14c2aa05710cf1ebe6 100644 --- a/lib/gpu/lal_tersoff_mod.cu +++ b/lib/gpu/lal_tersoff_mod.cu @@ -272,7 +272,7 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_, if (ii<inum) { int nbor_j, nbor_end, i, numj; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -432,7 +432,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_, if (ii<inum) { int nbor, nbor_end, i, numj; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj, n_stride,nbor_end,nbor); @@ -547,7 +547,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -702,7 +702,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -740,7 +740,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end, k_end; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -785,13 +785,13 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; @@ -956,7 +956,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -994,7 +994,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end, k_end; - const int* nbor_mem = dev_packed; + const __global int* nbor_mem = dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -1039,13 +1039,13 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; diff --git a/lib/gpu/lal_tersoff_zbl.cpp b/lib/gpu/lal_tersoff_zbl.cpp index 7cd0d9e6b2048b0a1d78c77ea1f8cdc930fa6184..ebf67285eda47914ed06f3d0ec4abe3d4d4084aa 100644 --- a/lib/gpu/lal_tersoff_zbl.cpp +++ b/lib/gpu/lal_tersoff_zbl.cpp @@ -337,7 +337,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -346,7 +346,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu index e8bb017f5901a9184b80403d8814aab50794500c..73ff51c70404cfea89fa7c9195e26ac12b6fbbcd 100644 --- a/lib/gpu/lal_tersoff_zbl.cu +++ b/lib/gpu/lal_tersoff_zbl.cu @@ -278,7 +278,7 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_, if (ii<inum) { int nbor_j, nbor_end, i, numj; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -445,7 +445,7 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_, if (ii<inum) { int nbor, nbor_end, i, numj; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj, n_stride,nbor_end,nbor); @@ -563,7 +563,7 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -714,7 +714,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -750,7 +750,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end, k_end; - const int* nbor_mem=dev_packed; + const __global int* nbor_mem=dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -795,13 +795,13 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; @@ -959,7 +959,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -995,7 +995,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, if (ii<inum) { int i, numj, nbor_j, nbor_end, k_end; - const int* nbor_mem = dev_packed; + const __global int* nbor_mem = dev_packed; int offset_j=offset/t_per_atom; nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, n_stride,nbor_end,nbor_j); @@ -1040,13 +1040,13 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; diff --git a/lib/gpu/lal_vashishta.cpp b/lib/gpu/lal_vashishta.cpp index d03ac992bd468ea0677a6b537468016f56b7b3e0..5a01a9bd46f185938710ab65cfd75761b3f5fb1e 100644 --- a/lib/gpu/lal_vashishta.cpp +++ b/lib/gpu/lal_vashishta.cpp @@ -278,7 +278,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } else { @@ -286,7 +286,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, &this->dev_short_nbor, + &this->nbor->dev_ilist, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_vashishta.cu b/lib/gpu/lal_vashishta.cu index d2e8bb1496abfd7355f88730cbb0d69c1c22339d..0da46c3b534cc29cba600851db0dda05a2cf38da 100644 --- a/lib/gpu/lal_vashishta.cu +++ b/lib/gpu/lal_vashishta.cu @@ -554,7 +554,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_, const int nelements, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -623,13 +623,13 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k]; @@ -709,7 +709,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_, const int nelements, const __global int * dev_nbor, const __global int * dev_packed, - const __global int * dev_acc, + const __global int * dev_ilist, const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, @@ -778,13 +778,13 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_, int nbor_k,numk; if (dev_nbor==dev_packed) { if (gpu_nbor) nbor_k=j+nbor_pitch; - else nbor_k=dev_acc[j]+nbor_pitch; + else nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); nbor_k+=offset_k; } else { - nbor_k=dev_acc[j]+nbor_pitch; + nbor_k=dev_ilist[j]+nbor_pitch; numk=dev_nbor[nbor_k]; nbor_k+=nbor_pitch; nbor_k=dev_nbor[nbor_k];