diff --git a/doc/src/package.txt b/doc/src/package.txt index 55bd111b50ad4b37100af9704cb18458b6a71d2a..3d25a64d3174aacc9218ed4d26e96cae5355ddef 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -512,9 +512,12 @@ this keyword is set to {on}, buffers in GPU memory are passed directly through MPI send/receive calls. This reduces overhead of first copying the data to the host CPU. However GPU-direct is not supported on all systems, which can lead to segmentation faults and would require -using a value of {off}. When the {gpu/direct} keyword is set to {off} -while any of the {comm} keywords are set to {device}, the value for the -{comm} keywords will be automatically changed to {host}. +using a value of {off}. If LAMMPS can safely detect that GPU-direct is +not available (currently only possible with OpenMPI v2.0.0 or later), +then the {gpu/direct} keyword is automatically set to {off} by default. +When the {gpu/direct} keyword is set to {off} while any of the {comm} +keywords are set to {device}, the value for these {comm} keywords will +be automatically changed to {host}. :line @@ -624,6 +627,8 @@ switch"_Section_start.html#start_6. For the KOKKOS package, the option defaults neigh = full, neigh/qeq = full, newton = off, binsize = 0.0, and comm = device, gpu/direct = on. +When LAMMPS can safely detect, that GPU-direct is not available, the +default value of gpu/direct becomes "off". These settings are made automatically by the required "-k on" "command-line switch"_Section_start.html#start_6. You can change them by using the package kokkos command in your input script or via the "-pk diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 6190d71b259235078566e69b08bfc9e384b4d4bb..21840b7c3ee8fd396b90e9983cd57c2d49ef64aa 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -406,7 +406,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) if (sendproc[iswap] != me) { double* buf_send_pair; double* buf_recv_pair; - if (lmp->kokkos->gpu_direct) { + if (lmp->kokkos->gpu_direct_flag) { buf_send_pair = k_buf_send_pair.view<DeviceType>().data(); buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data(); } else { @@ -424,7 +424,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct) { + if (!lmp->kokkos->gpu_direct_flag) { k_buf_recv_pair.modify<LMPHostType>(); k_buf_recv_pair.sync<DeviceType>(); } diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index 44795b569de1b0f0babfa6d9d769c7c8fec68a12..64a9d6992f0f56a64e5cbbb3d6fb674929f5ca41 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -529,7 +529,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) if (swap[m].sendproc != me) { FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->gpu_direct) { + if (lmp->kokkos->gpu_direct_flag) { buf1 = k_buf1.view<DeviceType>().data(); buf2 = k_buf2.view<DeviceType>().data(); } else { @@ -545,7 +545,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) swap[m].sendproc,0,gridcomm); MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct) { + if (!lmp->kokkos->gpu_direct_flag) { k_buf2.modify<LMPHostType>(); k_buf2.sync<DeviceType>(); } @@ -579,7 +579,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) if (swap[m].recvproc != me) { FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->gpu_direct) { + if (lmp->kokkos->gpu_direct_flag) { buf1 = k_buf1.view<DeviceType>().data(); buf2 = k_buf2.view<DeviceType>().data(); } else { @@ -595,7 +595,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) swap[m].recvproc,0,gridcomm); MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct) { + if (!lmp->kokkos->gpu_direct_flag) { k_buf2.modify<LMPHostType>(); k_buf2.sync<DeviceType>(); } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 050b1420d32ffb34fd2b774166518d67ec07111c..fb6b8d8d45aa5d4e1e582c9b745ec53906aa999b 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -158,14 +158,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) " but cannot determine if this is the case\n try" " '-pk kokkos gpu/direct off' when getting segmentation faults"); } else if ( 0 == have_gpu_direct() ) { - error->warning(FLERR,"GPU-direct is NOT available, but some parts of " - "Kokkos with CUDA require it by default\n try" - " '-pk kokkos gpu/direct off' when getting segmentation faults"); + error->warning(FLERR,"GPU-direct is NOT available, " + "using '-pk kokkos gpu/direct off' by default"); } else { ; // should never get here } } - #endif Kokkos::InitArguments args; @@ -186,7 +184,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_on_host = 0; forward_comm_on_host = 0; reverse_comm_on_host = 0; - gpu_direct = 1; + gpu_direct_flag = 1; + +#if KOKKOS_USE_CUDA + // only if we can safely detect, that GPU-direct is not available, change default + if (0 == have_gpu_direct()) gpu_direct_flag = 0; +#endif #ifdef KILL_KOKKOS_ON_SIGSEGV signal(SIGSEGV, my_signal_handler); @@ -217,7 +220,7 @@ void KokkosLMP::accelerator(int narg, char **arg) double binsize = 0.0; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; - gpu_direct = 1; + gpu_direct_flag = 1; int iarg = 0; while (iarg < narg) { @@ -303,8 +306,8 @@ void KokkosLMP::accelerator(int narg, char **arg) iarg += 2; } else if (strcmp(arg[iarg],"gpu/direct") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); - if (strcmp(arg[iarg+1],"off") == 0) gpu_direct = 0; - else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct = 1; + if (strcmp(arg[iarg+1],"off") == 0) gpu_direct_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); @@ -312,7 +315,7 @@ void KokkosLMP::accelerator(int narg, char **arg) // if "gpu/direct off" and "comm device", change to "comm host" - if (!gpu_direct) { + if (!gpu_direct_flag) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) exchange_comm_on_host = 1; if (forward_comm_classic == 0 && forward_comm_on_host == 0) diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index c3c5d0d6e1f142c4b426b11f2b8f47e002c455c4..cf209c0adb97e10a7bf5b45daa4502b3ad15d46a 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -34,7 +34,7 @@ class KokkosLMP : protected Pointers { int num_threads,ngpu; int numa; int auto_sync; - int gpu_direct; + int gpu_direct_flag; KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP();