diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 35b99ce19b0351bf730258467528faab7659dbaf..3bbff6be7e77c93930b23b1bf937c28c1a2afd04 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -113,14 +113,15 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) #ifdef KOKKOS_HAVE_CUDA if (ngpu <= 0) error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested"); - - Kokkos::HostSpace::execution_space::initialize(num_threads,numa); - Kokkos::Cuda::SelectDevice select_device(device); - Kokkos::Cuda::initialize(select_device); -#else - LMPHostType::initialize(num_threads,numa); #endif + Kokkos::InitArguments args; + args.num_threads = num_threads; + args.num_numa = numa; + args.device_id = device; + + Kokkos::initialize(args); + // default settings for package kokkos command neighflag = FULL; @@ -144,12 +145,7 @@ KokkosLMP::~KokkosLMP() { // finalize Kokkos -#ifdef KOKKOS_HAVE_CUDA - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); -#else - LMPHostType::finalize(); -#endif + Kokkos::finalize(); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 2f08d7525365d3c2cc03239dd44116f2c7e25257..7ff536f8dd792c77515ec0e0f55a569d1f4763ef 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -111,7 +111,10 @@ void PairDPDfdtEnergyKokkos<DeviceType>::init_style() #ifdef DPD_USE_RAN_MARS rand_pool.init(random,seed); #else - rand_pool.init(seed + comm->me,DeviceType::max_hardware_threads()); + typedef Kokkos::Experimental::UniqueToken< + DeviceType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + unique_token_type unique_token; + rand_pool.init(seed + comm->me,unique_token.size()); #endif } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 160128ad83e06ce84a56e0e7a1851ad00f52c694..810173d2195ad39c8715dcd6761691535bc85d72 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -815,7 +815,10 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIG int tid = 0; #ifndef KOKKOS_HAVE_CUDA - tid = DeviceType::hardware_thread_id(); + typedef Kokkos::Experimental::UniqueToken< + DeviceType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + unique_token_type unique_token; + tid = unique_token.acquire(); #endif int i,jj,jnum,itype,jtype; @@ -1152,6 +1155,10 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIG t_f(tid,i,2) += fz_i; t_uCG(tid,i) += uCG_i; t_uCGnew(tid,i) += uCGnew_i; + +#ifndef KOKKOS_HAVE_CUDA + unique_token.release(tid); +#endif } // Experimental thread-safe approach using duplicated data instead of atomics and @@ -1183,7 +1190,10 @@ void PairExp6rxKokkos<DeviceType>::vectorized_operator(const int &ii, EV_FLOAT& int tid = 0; #ifndef KOKKOS_HAVE_CUDA - tid = DeviceType::hardware_thread_id(); + typedef Kokkos::Experimental::UniqueToken< + DeviceType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + unique_token_type unique_token; + tid = unique_token.acquire(); #endif const int nRep = 12; @@ -1612,6 +1622,10 @@ void PairExp6rxKokkos<DeviceType>::vectorized_operator(const int &ii, EV_FLOAT& t_uCG(tid,i) += uCG_i; t_uCGnew(tid,i) += uCGnew_i; } + +#ifndef KOKKOS_HAVE_CUDA + unique_token.release(tid); +#endif } template<class DeviceType> diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.h b/src/KOKKOS/rand_pool_wrap_kokkos.h index 975ce0c89a41d9d4c866d673ddcba6a81c23851d..12255a8a62fc9f8eaabd520f29c9fe0ce1103b1e 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.h +++ b/src/KOKKOS/rand_pool_wrap_kokkos.h @@ -55,11 +55,15 @@ class RandPoolWrap : protected Pointers { #endif RandWrap rand_wrap; - int tid = 0; -#ifndef KOKKOS_HAVE_CUDA - tid = LMPDeviceType::hardware_thread_id(); -#endif + + typedef Kokkos::Experimental::UniqueToken< + LMPHostType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + + unique_token_type unique_token; + int tid = (int) unique_token.acquire(); rand_wrap.rng = random_thr[tid]; + unique_token.release(tid); + return rand_wrap; }