diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp index 346077e07119c4b3f5f45eb535f5ca698083ce30..8dd22022d8f7272d342cf6e86f36700e9086adf6 100644 --- a/src/KOKKOS/angle_charmm_kokkos.cpp +++ b/src/KOKKOS/angle_charmm_kokkos.cpp @@ -111,7 +111,6 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleCharmmCompute<0,0> >(0,nanglelist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/bond_class2_kokkos.cpp b/src/KOKKOS/bond_class2_kokkos.cpp index b01af92b5fddfdecf00ab2d00ec709a14c6a3741..b3c11c9a06c468be08f9ce2b2a29919c949ff475 100644 --- a/src/KOKKOS/bond_class2_kokkos.cpp +++ b/src/KOKKOS/bond_class2_kokkos.cpp @@ -110,7 +110,6 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondClass2Compute<0,0> >(0,nbondlist),*this); } } - //DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index cfc37bfa9f6e26afb65d97e985580d485bb65a91..025838340bbb6945b24e51c8bd185c72b1afa7d1 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -125,7 +125,6 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondFENECompute<0,0> >(0,nbondlist),*this); } } - DeviceType::fence(); k_warning_flag.template modify<DeviceType>(); k_warning_flag.template sync<LMPHostType>(); diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index 408f59c5632b87c6177f41d7b623dbc852370eee..da45c70d6c7739f08ad0fb559e6d9d7f768437c4 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -111,7 +111,6 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this); } } - //DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index 6a24591d6cbb66842df6cddefeef873f8f78203b..2ea67a1fb135a695a1e37b3c5d3e0de2ae82762e 100644 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -63,7 +63,6 @@ double ComputeTempKokkos<DeviceType>::compute_scalar() Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk); - DeviceType::fence(); copymode = 0; t = t_kk.t0; // could make this more efficient @@ -118,7 +117,6 @@ void ComputeTempKokkos<DeviceType>::compute_vector() Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk); - DeviceType::fence(); copymode = 0; t[0] = t_kk.t0; diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index ee9e3d1244b0c4c22bddee191751471803c4a08e..a8a8aade604a001cc8e2499b2be9c06f74f500af 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -132,7 +132,6 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmCompute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp index edfd1b3395ba909aec568fdd86d5867665efcad1..89e42c6f836a1f3fa9e502875332bffb8bb4cd08 100644 --- a/src/KOKKOS/dihedral_class2_kokkos.cpp +++ b/src/KOKKOS/dihedral_class2_kokkos.cpp @@ -159,7 +159,6 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralClass2Compute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp index 8e222ad86083e762df7e698be81b6e080d42fcbf..e37d4d2ef5636b867e927da9421b06f959d0b093 100644 --- a/src/KOKKOS/dihedral_opls_kokkos.cpp +++ b/src/KOKKOS/dihedral_opls_kokkos.cpp @@ -121,7 +121,6 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralOPLSCompute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 0572dcedbecbfab7634285d2dff7b73ac967e008..fb0f329a91bb398bad243493e9d834ac3ab1aa8d 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -506,7 +506,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } - DeviceType::fence(); if(tbiasflag == BIAS){ atomKK->sync(temperature->execution_space,temperature->datamask_read); @@ -531,7 +530,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) // set total force zero in parallel on the device FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this); Kokkos::parallel_for(nlocal,zero_functor); - DeviceType::fence(); } // f is modified by both post_force and zero_force functors atomKK->modified(execution_space,datamask_modify); @@ -726,7 +724,6 @@ double FixLangevinKokkos<DeviceType>::compute_scalar() k_flangevin.template sync<DeviceType>(); FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this); Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep); - DeviceType::fence(); energy = 0.5*energy_onestep*update->dt; } @@ -770,7 +767,6 @@ void FixLangevinKokkos<DeviceType>::end_of_step() k_flangevin.template sync<DeviceType>(); FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this); Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep); - DeviceType::fence(); energy += energy_onestep*update->dt; } diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index 2b55259365d5100ced33316ea81f272b49bdb0cc..fb03bf68c6308163a2126a0e9f9a2ec82f9139c2 100644 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -495,7 +495,6 @@ void FixNHKokkos<DeviceType>::nh_v_press() Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<1> >(0,nlocal),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<0> >(0,nlocal),*this); - DeviceType::fence(); copymode = 0; atomKK->modified(execution_space,V_MASK); @@ -550,7 +549,6 @@ void FixNHKokkos<DeviceType>::nve_v() Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -595,7 +593,6 @@ void FixNHKokkos<DeviceType>::nve_x() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_x>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -631,7 +628,6 @@ void FixNHKokkos<DeviceType>::nh_v_temp() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_temp>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; atomKK->modified(execution_space,V_MASK); diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index 4c041f85b0794a2f63ff2d803c27ed84db275b61..eb41443bab6c410044ad6a2f32a0cd7c3ee2feb0 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -76,7 +76,6 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag) FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this); Kokkos::parallel_for(nlocal,functor); } - DeviceType::fence(); } template<class DeviceType> @@ -133,7 +132,6 @@ void FixNVEKokkos<DeviceType>::final_integrate() FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this); Kokkos::parallel_for(nlocal,functor); } - DeviceType::fence(); // debug //atomKK->sync(Host,datamask_read); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 2e46b85fd2e7800b1883d0925f1285f061edc12e..5cafbd2ef3d2b1cf0626c7f736e33f4c02ec2e8f 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -234,12 +234,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag) // compute_H FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this); Kokkos::parallel_scan(inum,computeH_functor); - DeviceType::fence(); // init_matvec FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this); Kokkos::parallel_for(inum,matvec_functor); - DeviceType::fence(); // comm->forward_comm_fix(this); //Dist_vector( s ); pack_flag = 2; @@ -259,15 +257,12 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag) // 1st cg solve over b_s, s cg_solve1(); - DeviceType::fence(); // 2nd cg solve over b_t, t cg_solve2(); - DeviceType::fence(); // calculate_Q(); calculate_q(); - DeviceType::fence(); copymode = 0; @@ -354,7 +349,6 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array() const int ignum = atom->nlocal + atom->nghost; FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this); Kokkos::parallel_for(ignum,zero_functor); - DeviceType::fence(); } /* ---------------------------------------------------------------------- */ @@ -499,10 +493,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this); Kokkos::parallel_for(inum,sparse12_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this); Kokkos::parallel_for(inum,sparse13_functor); @@ -513,7 +505,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -529,21 +520,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this); Kokkos::parallel_reduce(inum,norm1_functor,my_norm); - DeviceType::fence(); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); - DeviceType::fence(); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); - DeviceType::fence(); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; - DeviceType::fence(); int loop; const int loopmax = 200; @@ -560,10 +547,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); @@ -574,7 +559,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { @@ -589,7 +573,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; @@ -602,12 +585,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this); Kokkos::parallel_for(inum,precon1_functor); - DeviceType::fence(); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; @@ -616,7 +597,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); - DeviceType::fence(); } if (loop >= loopmax && comm->me == 0) { @@ -644,10 +624,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this); Kokkos::parallel_for(inum,sparse32_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this); Kokkos::parallel_for(inum,sparse33_functor); @@ -658,7 +636,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -674,21 +651,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this); Kokkos::parallel_reduce(inum,norm2_functor,my_norm); - DeviceType::fence(); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); - DeviceType::fence(); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); - DeviceType::fence(); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; - DeviceType::fence(); int loop; const int loopmax = 200; @@ -705,10 +678,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); @@ -719,7 +690,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -733,10 +703,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; - DeviceType::fence(); alpha = sig_new / tmp; @@ -747,12 +715,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this); Kokkos::parallel_for(inum,precon2_functor); - DeviceType::fence(); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; @@ -761,7 +727,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); - DeviceType::fence(); } if (loop >= loopmax && comm->me == 0) { @@ -786,7 +751,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this); Kokkos::parallel_reduce(inum,vecacc1_functor,sum); - DeviceType::fence(); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT s_sum = sum_all; @@ -794,7 +758,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this); Kokkos::parallel_reduce(inum,vecacc2_functor,sum); - DeviceType::fence(); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT t_sum = sum_all; @@ -804,7 +767,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() // q[i] = s[i] - u * t[i]; FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this); Kokkos::parallel_for(inum,calculateQ_functor); - DeviceType::fence(); pack_flag = 4; //comm->forward_comm_fix( this ); //Dist_vector( atom->q ); diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index 27f7d100fa57f0a86ddc3c311f6f3b4a98cd6d5d..5e26ef3610535a74b9e24ba2fc9f9c9822589c04 100644 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -108,7 +108,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) if (varflag == CONSTANT) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceConstant>(0,nlocal),*this,foriginal_kk); - DeviceType::fence(); copymode = 0; // variable force, wrap with clear/add @@ -138,7 +137,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceNonConstant>(0,nlocal),*this,foriginal_kk); - DeviceType::fence(); copymode = 0; } diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp index 55be7e5cd7c5c86335a0a8208cbf8ca39005c520..cd7a2c59b78ae84c3c0ae58f34c3998705b442d1 100644 --- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp +++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp @@ -79,7 +79,6 @@ void FixWallReflectKokkos<DeviceType>::post_integrate() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallReflectPostIntegrate>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp index 25bd2c732f6621278661e3fb891338a263992466..c2cb7dfe2bf55df1f64988abe60a152d42984654 100644 --- a/src/KOKKOS/improper_class2_kokkos.cpp +++ b/src/KOKKOS/improper_class2_kokkos.cpp @@ -140,7 +140,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2Compute<0,0> >(0,nimproperlist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; // error check @@ -165,7 +164,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2AngleAngle<0,0> >(0,nimproperlist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index 9c99b35bd9ec374953985eeec173dcea58d40c76..1e58e18c5189766dad20958c535f0eb123169950 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -128,7 +128,6 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this); } } - //DeviceType::fence(); // error check diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 45c320bc516fcfd13816a6dfe809ec7296d035c5..76c701213d623303f354c3e8536ef85e121e951b 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -409,7 +409,6 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -428,7 +427,6 @@ void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b9fa82740aeb0d59bba5e1c5d5d9f6e762292941..9b565f8edee1b7df915f220b78d2e932d353417c 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -133,7 +133,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this); - DeviceType::fence(); // loop over neighbors of my atoms @@ -156,7 +155,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this); } } - DeviceType::fence(); // communicate and sum densities (on the host) @@ -174,7 +172,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this); - DeviceType::fence(); } else if (neighflag == FULL) { @@ -184,7 +181,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this); - DeviceType::fence(); } if (eflag) { @@ -239,7 +235,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -414,7 +409,6 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_ iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -433,7 +427,6 @@ void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index e4128de72265b609e9d86723d8acc9100e446346..7be8e54605fc0443d92eb64f32598b50ef2bf38e 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -409,7 +409,6 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -428,7 +427,6 @@ void PairEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 59369b5e082c3b383012670da52697f98e7a0163..6082c932874f314183370fbb9023b3d091b506e6 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -731,7 +731,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this); } - DeviceType::fence(); ev_all += ev; pvector[13] = ev.ecoul; @@ -771,7 +770,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this); } } - DeviceType::fence(); ev_all += ev; pvector[10] = ev.evdwl; pvector[11] = ev.ecoul; @@ -800,7 +798,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // zero Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this); - DeviceType::fence(); if (neighflag == HALF) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this); @@ -808,7 +805,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this); else //(neighflag == FULL) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this); - DeviceType::fence(); k_resize_bo.modify<DeviceType>(); k_resize_bo.sync<LMPHostType>(); @@ -827,15 +823,11 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Bond order if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this); - DeviceType::fence(); } else if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this); - DeviceType::fence(); } Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this); - DeviceType::fence(); Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this); - DeviceType::fence(); // Bond energy if (neighflag == HALF) { @@ -843,7 +835,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] = ev.evdwl; } else { //if (neighflag == HALFTHREAD) { @@ -851,7 +842,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] = ev.evdwl; } @@ -859,21 +849,17 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Multi-body corrections if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[2] = ev.ereax[0]; @@ -887,14 +873,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[4] = ev.ereax[3]; @@ -908,14 +892,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[8] = ev.ereax[6]; @@ -929,14 +911,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } } @@ -946,22 +926,18 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Bond force if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] += ev.evdwl; } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] += ev.evdwl; } @@ -3945,11 +3921,9 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag) if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0; if (eflag_atom) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this); - DeviceType::fence(); } if (vflag_atom) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this); - DeviceType::fence(); } // if vflag_global = 2 and pair::compute() calls virial_fdotr_compute() @@ -4002,7 +3976,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondZero>(0,nmax),*this); - DeviceType::fence(); bo_cut_bond = control->bg_cut; @@ -4017,7 +3990,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds) numbonds = 0; PairReaxCKokkosFindBondFunctor<DeviceType> find_bond_functor(this); Kokkos::parallel_reduce(inum,find_bond_functor,numbonds); - DeviceType::fence(); copymode = 0; } @@ -4076,7 +4048,6 @@ void PairReaxCKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, int nlocal = atomKK->nlocal; PairReaxCKokkosPackBondBufferFunctor<DeviceType> pack_bond_buffer_functor(this); Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor); - DeviceType::fence(); copymode = 0; k_buf.modify<DeviceType>(); @@ -4135,11 +4106,9 @@ void PairReaxCKokkos<DeviceType>::FindBondSpecies() { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpeciesZero>(0,nmax),*this); - DeviceType::fence(); nlocal = atomKK->nlocal; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpecies>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; // NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3ad7334d2f55c982e54e410357c7f3deb445d534..bd3ed3644f53373305a995382144a6d5c27a5432 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -403,17 +403,14 @@ void PPPMKokkos<DeviceType>::setup() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup1>(nxlo_fft,nxhi_fft+1),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup2>(nylo_fft,nyhi_fft+1),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup3>(nzlo_fft,nzhi_fft+1),*this); - DeviceType::fence(); copymode = 0; // merge three outer loops into one for better threading @@ -425,7 +422,6 @@ void PPPMKokkos<DeviceType>::setup() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup4>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; compute_gf_ik(); @@ -753,7 +749,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) if (eflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self1>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; //for (i = nlocal; i < ntotal; i++) d_eatom[i] *= 0.5*qscale; } @@ -761,7 +756,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) if (vflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self2>(0,ntotal),*this); - DeviceType::fence(); copymode = 0; } } @@ -1415,7 +1409,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; } @@ -1495,7 +1488,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik_triclinic() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik_triclinic>(nzlo_fft,nzhi_fft+1),*this); - DeviceType::fence(); copymode = 0; } @@ -1588,7 +1580,6 @@ void PPPMKokkos<DeviceType>::particle_map() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_particle_map>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; k_flag.template modify<DeviceType>(); @@ -1641,7 +1632,6 @@ void PPPMKokkos<DeviceType>::make_rho() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_zero>(0,inum_out),*this); - DeviceType::fence(); copymode = 0; // loop over my charges, add their contribution to nearby grid points @@ -1654,7 +1644,6 @@ void PPPMKokkos<DeviceType>::make_rho() #ifdef KOKKOS_HAVE_CUDA copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_atomic>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; #else ix = nxhi_out-nxlo_out + 1; @@ -1663,7 +1652,6 @@ void PPPMKokkos<DeviceType>::make_rho() copymode = 1; Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1); Kokkos::parallel_for(config,*this); - DeviceType::fence(); copymode = 0; #endif } @@ -1794,7 +1782,6 @@ void PPPMKokkos<DeviceType>::brick2fft() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_brick2fft>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; k_density_fft.template modify<DeviceType>(); @@ -1842,7 +1829,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik1>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work1.template modify<DeviceType>(); @@ -1862,14 +1848,12 @@ void PPPMKokkos<DeviceType>::poisson_ik() if (vflag_global) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev); - DeviceType::fence(); copymode = 0; for (j = 0; j < 6; j++) virial[j] += ev.v[j]; energy += ev.ecoul; } else { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev); - DeviceType::fence(); copymode = 0; energy += ev.ecoul; } @@ -1880,7 +1864,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik4>(0,nfft),*this); - DeviceType::fence(); copymode = 0; // extra FFTs for per-atomKK energy/virial @@ -1914,7 +1897,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik5>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1926,7 +1908,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik6>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; @@ -1934,7 +1915,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik7>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1946,14 +1926,12 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik8>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; // z direction gradient copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik9>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1965,7 +1943,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik10>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2215,7 +2192,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() if (eflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom1>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2227,7 +2203,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom2>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2238,7 +2213,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom3>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2250,13 +2224,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom4>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom5>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2268,13 +2240,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom6>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom7>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2286,12 +2256,10 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom8>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom9>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2303,13 +2271,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom10>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom11>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2321,13 +2287,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom12>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom13>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2339,7 +2303,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom14>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2545,7 +2508,6 @@ void PPPMKokkos<DeviceType>::fieldforce_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_ik>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -2606,7 +2568,6 @@ void PPPMKokkos<DeviceType>::fieldforce_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_peratom>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -2682,12 +2643,10 @@ void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_ if (flag == FORWARD_IK) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward1>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } else if (flag == FORWARD_IK_PERATOM) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward2>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } } @@ -2740,12 +2699,10 @@ void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FF if (flag == FORWARD_IK) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward1>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } else if (flag == FORWARD_IK_PERATOM) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward2>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } } @@ -2798,7 +2755,6 @@ void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_ copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_reverse>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } @@ -2829,7 +2785,6 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } @@ -2989,7 +2944,6 @@ void PPPMKokkos<DeviceType>::slabcorr() double dipole = 0.0; copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr1>(0,nlocal),*this,dipole); - DeviceType::fence(); copymode = 0; // sum local contributions to get global dipole moment @@ -3003,7 +2957,6 @@ void PPPMKokkos<DeviceType>::slabcorr() if (eflag_atom || fabs(qsum) > SMALL) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr2>(0,nlocal),*this,dipole_r2); - DeviceType::fence(); copymode = 0; // sum local contributions @@ -3027,7 +2980,6 @@ void PPPMKokkos<DeviceType>::slabcorr() efact = qscale * MY_2PI/volume; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr3>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -3037,7 +2989,6 @@ void PPPMKokkos<DeviceType>::slabcorr() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr4>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -3081,7 +3032,6 @@ int PPPMKokkos<DeviceType>::timing_1d(int n, double &time1d) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this); - DeviceType::fence(); copymode = 0; MPI_Barrier(world); @@ -3119,7 +3069,6 @@ int PPPMKokkos<DeviceType>::timing_3d(int n, double &time3d) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this); - DeviceType::fence(); copymode = 0; MPI_Barrier(world);