From cbc5b8bdae1d97960116bf73c82115b7f52666dd Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Tue, 31 May 2016 15:30:30 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15076 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/Makefile | 2 +- src/USER-CUDA/Install.sh | 202 -- src/USER-CUDA/README | 21 - src/USER-CUDA/atom_vec_angle_cuda.cpp | 467 --- src/USER-CUDA/atom_vec_angle_cuda.h | 69 - src/USER-CUDA/atom_vec_atomic_cuda.cpp | 394 --- src/USER-CUDA/atom_vec_atomic_cuda.h | 81 - src/USER-CUDA/atom_vec_charge_cuda.cpp | 394 --- src/USER-CUDA/atom_vec_charge_cuda.h | 69 - src/USER-CUDA/atom_vec_full_cuda.cpp | 508 --- src/USER-CUDA/atom_vec_full_cuda.h | 69 - src/USER-CUDA/comm_cuda.cpp | 1375 -------- src/USER-CUDA/comm_cuda.h | 69 - src/USER-CUDA/compute_pe_cuda.cpp | 61 - src/USER-CUDA/compute_pe_cuda.h | 59 - src/USER-CUDA/compute_pressure_cuda.cpp | 97 - src/USER-CUDA/compute_pressure_cuda.h | 63 - src/USER-CUDA/compute_temp_cuda.cpp | 215 -- src/USER-CUDA/compute_temp_cuda.h | 76 - src/USER-CUDA/compute_temp_partial_cuda.cpp | 360 -- src/USER-CUDA/compute_temp_partial_cuda.h | 84 - src/USER-CUDA/cuda.cpp | 1067 ------ src/USER-CUDA/cuda_data.h | 796 ----- src/USER-CUDA/cuda_modify_flags.h | 45 - src/USER-CUDA/cuda_neigh_list.cpp | 184 -- src/USER-CUDA/cuda_neigh_list.h | 83 - src/USER-CUDA/domain_cuda.cpp | 345 -- src/USER-CUDA/domain_cuda.h | 41 - src/USER-CUDA/fft3d_cuda.cpp | 609 ---- src/USER-CUDA/fft3d_cuda.h | 148 - src/USER-CUDA/fft3d_wrap_cuda.cpp | 111 - src/USER-CUDA/fft3d_wrap_cuda.h | 68 - src/USER-CUDA/fix_addforce_cuda.cpp | 193 -- src/USER-CUDA/fix_addforce_cuda.h | 64 - src/USER-CUDA/fix_aveforce_cuda.cpp | 262 -- src/USER-CUDA/fix_aveforce_cuda.h | 68 - src/USER-CUDA/fix_enforce2d_cuda.cpp | 171 - src/USER-CUDA/fix_enforce2d_cuda.h | 55 - src/USER-CUDA/fix_freeze_cuda.cpp | 137 - src/USER-CUDA/fix_freeze_cuda.h | 57 - src/USER-CUDA/fix_gravity_cuda.cpp | 180 - src/USER-CUDA/fix_gravity_cuda.h | 60 - src/USER-CUDA/fix_nh_cuda.cpp | 2072 ------------ src/USER-CUDA/fix_nh_cuda.h | 126 - src/USER-CUDA/fix_npt_cuda.cpp | 75 - src/USER-CUDA/fix_npt_cuda.h | 36 - src/USER-CUDA/fix_nve_cuda.cpp | 157 - src/USER-CUDA/fix_nve_cuda.h | 63 - src/USER-CUDA/fix_nvt_cuda.cpp | 52 - src/USER-CUDA/fix_nvt_cuda.h | 36 - src/USER-CUDA/fix_set_force_cuda.cpp | 184 -- src/USER-CUDA/fix_set_force_cuda.h | 63 - src/USER-CUDA/fix_shake_cuda.cpp | 2885 ----------------- src/USER-CUDA/fix_shake_cuda.h | 130 - src/USER-CUDA/fix_temp_berendsen_cuda.cpp | 219 -- src/USER-CUDA/fix_temp_berendsen_cuda.h | 58 - src/USER-CUDA/fix_temp_rescale_cuda.cpp | 224 -- src/USER-CUDA/fix_temp_rescale_cuda.h | 61 - src/USER-CUDA/fix_temp_rescale_limit_cuda.cpp | 237 -- src/USER-CUDA/fix_temp_rescale_limit_cuda.h | 61 - src/USER-CUDA/fix_viscous_cuda.cpp | 105 - src/USER-CUDA/fix_viscous_cuda.h | 55 - src/USER-CUDA/modify_cuda.cpp | 437 --- src/USER-CUDA/modify_cuda.h | 83 - src/USER-CUDA/neigh_full_cuda.cpp | 307 -- src/USER-CUDA/neighbor_cuda.cpp | 240 -- src/USER-CUDA/neighbor_cuda.h | 39 - src/USER-CUDA/pair_born_coul_long_cuda.cpp | 183 -- src/USER-CUDA/pair_born_coul_long_cuda.h | 57 - src/USER-CUDA/pair_buck_coul_cut_cuda.cpp | 170 - src/USER-CUDA/pair_buck_coul_cut_cuda.h | 57 - src/USER-CUDA/pair_buck_coul_long_cuda.cpp | 181 -- src/USER-CUDA/pair_buck_coul_long_cuda.h | 57 - src/USER-CUDA/pair_buck_cuda.cpp | 166 - src/USER-CUDA/pair_buck_cuda.h | 57 - src/USER-CUDA/pair_eam_alloy_cuda.cpp | 326 -- src/USER-CUDA/pair_eam_alloy_cuda.h | 44 - src/USER-CUDA/pair_eam_cuda.cpp | 265 -- src/USER-CUDA/pair_eam_cuda.h | 80 - src/USER-CUDA/pair_eam_fs_cuda.cpp | 335 -- src/USER-CUDA/pair_eam_fs_cuda.h | 44 - src/USER-CUDA/pair_gran_hooke_cuda.cpp | 250 -- src/USER-CUDA/pair_gran_hooke_cuda.h | 57 - src/USER-CUDA/pair_lj96_cut_cuda.cpp | 179 - src/USER-CUDA/pair_lj96_cut_cuda.h | 57 - .../pair_lj_charmm_coul_charmm_cuda.cpp | 188 -- .../pair_lj_charmm_coul_charmm_cuda.h | 63 - ...ir_lj_charmm_coul_charmm_implicit_cuda.cpp | 183 -- ...pair_lj_charmm_coul_charmm_implicit_cuda.h | 62 - .../pair_lj_charmm_coul_long_cuda.cpp | 196 -- src/USER-CUDA/pair_lj_charmm_coul_long_cuda.h | 62 - .../pair_lj_class2_coul_cut_cuda.cpp | 162 - src/USER-CUDA/pair_lj_class2_coul_cut_cuda.h | 57 - .../pair_lj_class2_coul_long_cuda.cpp | 175 - src/USER-CUDA/pair_lj_class2_coul_long_cuda.h | 57 - src/USER-CUDA/pair_lj_class2_cuda.cpp | 167 - src/USER-CUDA/pair_lj_class2_cuda.h | 57 - src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp | 162 - src/USER-CUDA/pair_lj_cut_coul_cut_cuda.h | 57 - src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp | 163 - src/USER-CUDA/pair_lj_cut_coul_debye_cuda.h | 57 - src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp | 216 -- src/USER-CUDA/pair_lj_cut_coul_long_cuda.h | 57 - src/USER-CUDA/pair_lj_cut_cuda.cpp | 179 - src/USER-CUDA/pair_lj_cut_cuda.h | 57 - .../pair_lj_cut_experimental_cuda.cpp | 178 - src/USER-CUDA/pair_lj_cut_experimental_cuda.h | 57 - src/USER-CUDA/pair_lj_expand_cuda.cpp | 180 - src/USER-CUDA/pair_lj_expand_cuda.h | 57 - .../pair_lj_gromacs_coul_gromacs_cuda.cpp | 194 -- .../pair_lj_gromacs_coul_gromacs_cuda.h | 68 - src/USER-CUDA/pair_lj_gromacs_cuda.cpp | 177 - src/USER-CUDA/pair_lj_gromacs_cuda.h | 68 - src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp | 193 -- src/USER-CUDA/pair_lj_sdk_coul_long_cuda.h | 59 - src/USER-CUDA/pair_lj_sdk_cuda.cpp | 184 -- src/USER-CUDA/pair_lj_sdk_cuda.h | 65 - src/USER-CUDA/pair_lj_smooth_cuda.cpp | 177 - src/USER-CUDA/pair_lj_smooth_cuda.h | 68 - src/USER-CUDA/pair_morse_cuda.cpp | 177 - src/USER-CUDA/pair_morse_cuda.h | 57 - src/USER-CUDA/pair_sw_cuda.cpp | 207 -- src/USER-CUDA/pair_sw_cuda.h | 66 - src/USER-CUDA/pair_tersoff_cuda.cpp | 204 -- src/USER-CUDA/pair_tersoff_cuda.h | 66 - src/USER-CUDA/pair_tersoff_zbl_cuda.cpp | 220 -- src/USER-CUDA/pair_tersoff_zbl_cuda.h | 53 - src/USER-CUDA/pppm_cuda.cpp | 1420 -------- src/USER-CUDA/pppm_cuda.h | 113 - src/USER-CUDA/pppm_old.cpp | 2839 ---------------- src/USER-CUDA/pppm_old.h | 271 -- src/USER-CUDA/user_cuda.h | 159 - src/USER-CUDA/verlet_cuda.cpp | 1230 ------- src/USER-CUDA/verlet_cuda.h | 63 - 134 files changed, 1 insertion(+), 31295 deletions(-) delete mode 100755 src/USER-CUDA/Install.sh delete mode 100644 src/USER-CUDA/README delete mode 100644 src/USER-CUDA/atom_vec_angle_cuda.cpp delete mode 100644 src/USER-CUDA/atom_vec_angle_cuda.h delete mode 100644 src/USER-CUDA/atom_vec_atomic_cuda.cpp delete mode 100644 src/USER-CUDA/atom_vec_atomic_cuda.h delete mode 100644 src/USER-CUDA/atom_vec_charge_cuda.cpp delete mode 100644 src/USER-CUDA/atom_vec_charge_cuda.h delete mode 100644 src/USER-CUDA/atom_vec_full_cuda.cpp delete mode 100644 src/USER-CUDA/atom_vec_full_cuda.h delete mode 100644 src/USER-CUDA/comm_cuda.cpp delete mode 100644 src/USER-CUDA/comm_cuda.h delete mode 100644 src/USER-CUDA/compute_pe_cuda.cpp delete mode 100644 src/USER-CUDA/compute_pe_cuda.h delete mode 100644 src/USER-CUDA/compute_pressure_cuda.cpp delete mode 100644 src/USER-CUDA/compute_pressure_cuda.h delete mode 100644 src/USER-CUDA/compute_temp_cuda.cpp delete mode 100644 src/USER-CUDA/compute_temp_cuda.h delete mode 100644 src/USER-CUDA/compute_temp_partial_cuda.cpp delete mode 100644 src/USER-CUDA/compute_temp_partial_cuda.h delete mode 100644 src/USER-CUDA/cuda.cpp delete mode 100644 src/USER-CUDA/cuda_data.h delete mode 100644 src/USER-CUDA/cuda_modify_flags.h delete mode 100644 src/USER-CUDA/cuda_neigh_list.cpp delete mode 100644 src/USER-CUDA/cuda_neigh_list.h delete mode 100644 src/USER-CUDA/domain_cuda.cpp delete mode 100644 src/USER-CUDA/domain_cuda.h delete mode 100644 src/USER-CUDA/fft3d_cuda.cpp delete mode 100644 src/USER-CUDA/fft3d_cuda.h delete mode 100644 src/USER-CUDA/fft3d_wrap_cuda.cpp delete mode 100644 src/USER-CUDA/fft3d_wrap_cuda.h delete mode 100644 src/USER-CUDA/fix_addforce_cuda.cpp delete mode 100644 src/USER-CUDA/fix_addforce_cuda.h delete mode 100644 src/USER-CUDA/fix_aveforce_cuda.cpp delete mode 100644 src/USER-CUDA/fix_aveforce_cuda.h delete mode 100644 src/USER-CUDA/fix_enforce2d_cuda.cpp delete mode 100644 src/USER-CUDA/fix_enforce2d_cuda.h delete mode 100644 src/USER-CUDA/fix_freeze_cuda.cpp delete mode 100644 src/USER-CUDA/fix_freeze_cuda.h delete mode 100644 src/USER-CUDA/fix_gravity_cuda.cpp delete mode 100644 src/USER-CUDA/fix_gravity_cuda.h delete mode 100644 src/USER-CUDA/fix_nh_cuda.cpp delete mode 100644 src/USER-CUDA/fix_nh_cuda.h delete mode 100644 src/USER-CUDA/fix_npt_cuda.cpp delete mode 100644 src/USER-CUDA/fix_npt_cuda.h delete mode 100644 src/USER-CUDA/fix_nve_cuda.cpp delete mode 100644 src/USER-CUDA/fix_nve_cuda.h delete mode 100644 src/USER-CUDA/fix_nvt_cuda.cpp delete mode 100644 src/USER-CUDA/fix_nvt_cuda.h delete mode 100644 src/USER-CUDA/fix_set_force_cuda.cpp delete mode 100644 src/USER-CUDA/fix_set_force_cuda.h delete mode 100644 src/USER-CUDA/fix_shake_cuda.cpp delete mode 100644 src/USER-CUDA/fix_shake_cuda.h delete mode 100644 src/USER-CUDA/fix_temp_berendsen_cuda.cpp delete mode 100644 src/USER-CUDA/fix_temp_berendsen_cuda.h delete mode 100644 src/USER-CUDA/fix_temp_rescale_cuda.cpp delete mode 100644 src/USER-CUDA/fix_temp_rescale_cuda.h delete mode 100644 src/USER-CUDA/fix_temp_rescale_limit_cuda.cpp delete mode 100644 src/USER-CUDA/fix_temp_rescale_limit_cuda.h delete mode 100644 src/USER-CUDA/fix_viscous_cuda.cpp delete mode 100644 src/USER-CUDA/fix_viscous_cuda.h delete mode 100644 src/USER-CUDA/modify_cuda.cpp delete mode 100644 src/USER-CUDA/modify_cuda.h delete mode 100644 src/USER-CUDA/neigh_full_cuda.cpp delete mode 100644 src/USER-CUDA/neighbor_cuda.cpp delete mode 100644 src/USER-CUDA/neighbor_cuda.h delete mode 100644 src/USER-CUDA/pair_born_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_born_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_buck_coul_cut_cuda.cpp delete mode 100644 src/USER-CUDA/pair_buck_coul_cut_cuda.h delete mode 100644 src/USER-CUDA/pair_buck_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_buck_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_buck_cuda.cpp delete mode 100644 src/USER-CUDA/pair_buck_cuda.h delete mode 100644 src/USER-CUDA/pair_eam_alloy_cuda.cpp delete mode 100644 src/USER-CUDA/pair_eam_alloy_cuda.h delete mode 100644 src/USER-CUDA/pair_eam_cuda.cpp delete mode 100644 src/USER-CUDA/pair_eam_cuda.h delete mode 100644 src/USER-CUDA/pair_eam_fs_cuda.cpp delete mode 100644 src/USER-CUDA/pair_eam_fs_cuda.h delete mode 100644 src/USER-CUDA/pair_gran_hooke_cuda.cpp delete mode 100644 src/USER-CUDA/pair_gran_hooke_cuda.h delete mode 100644 src/USER-CUDA/pair_lj96_cut_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj96_cut_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_charmm_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_class2_coul_cut_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_class2_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_class2_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_class2_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_cut_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_debye_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_cut_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_cut_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_cut_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_cut_experimental_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_expand_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_expand_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_gromacs_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_gromacs_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_sdk_coul_long_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_sdk_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_sdk_cuda.h delete mode 100644 src/USER-CUDA/pair_lj_smooth_cuda.cpp delete mode 100644 src/USER-CUDA/pair_lj_smooth_cuda.h delete mode 100644 src/USER-CUDA/pair_morse_cuda.cpp delete mode 100644 src/USER-CUDA/pair_morse_cuda.h delete mode 100644 src/USER-CUDA/pair_sw_cuda.cpp delete mode 100644 src/USER-CUDA/pair_sw_cuda.h delete mode 100644 src/USER-CUDA/pair_tersoff_cuda.cpp delete mode 100644 src/USER-CUDA/pair_tersoff_cuda.h delete mode 100644 src/USER-CUDA/pair_tersoff_zbl_cuda.cpp delete mode 100644 src/USER-CUDA/pair_tersoff_zbl_cuda.h delete mode 100644 src/USER-CUDA/pppm_cuda.cpp delete mode 100644 src/USER-CUDA/pppm_cuda.h delete mode 100755 src/USER-CUDA/pppm_old.cpp delete mode 100644 src/USER-CUDA/pppm_old.h delete mode 100644 src/USER-CUDA/user_cuda.h delete mode 100644 src/USER-CUDA/verlet_cuda.cpp delete mode 100644 src/USER-CUDA/verlet_cuda.h diff --git a/src/Makefile b/src/Makefile index 16d3e5326c..85b212d539 100755 --- a/src/Makefile +++ b/src/Makefile @@ -46,7 +46,7 @@ PACKAGE = asphere body class2 colloid compress coreshell dipole gpu \ kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \ python qeq reax replica rigid shock snap srd voronoi -PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars user-cuda \ +PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \ user-diffraction user-dpd user-drude user-eff user-fep user-h5md \ user-intel user-lb user-manifold user-mgpt \ user-misc user-molfile user-omp user-phonon user-qmmm user-qtb \ diff --git a/src/USER-CUDA/Install.sh b/src/USER-CUDA/Install.sh deleted file mode 100755 index 96345160a6..0000000000 --- a/src/USER-CUDA/Install.sh +++ /dev/null @@ -1,202 +0,0 @@ -# Install/unInstall package files in LAMMPS -# mode = 0/1/2 for uninstall/install/update - -mode=$1 - -# arg1 = file, arg2 = file it depends on - -action () { - if (test $mode = 0) then - rm -f ../$1 - elif (! cmp -s $1 ../$1) then - if (test -z "$2" || test -e ../$2) then - cp $1 .. - if (test $mode = 2) then - echo " updating src/$1" - fi - fi - elif (test -n "$2") then - if (test ! -e ../$2) then - rm -f ../$1 - fi - fi -} - -# force rebuild of files with LMP_USER_CUDA switch - -touch ../accelerator_cuda.h - -# list of files with optional dependencies - -action atom_vec_angle_cuda.cpp atom_vec_angle.cpp -action atom_vec_angle_cuda.h atom_vec_angle.cpp -action atom_vec_atomic_cuda.cpp -action atom_vec_atomic_cuda.h -action atom_vec_charge_cuda.cpp -action atom_vec_charge_cuda.h -action atom_vec_full_cuda.cpp atom_vec_full.cpp -action atom_vec_full_cuda.h atom_vec_full.cpp -action comm_cuda.cpp -action comm_cuda.h -action compute_pe_cuda.cpp -action compute_pe_cuda.h -action compute_pressure_cuda.cpp -action compute_pressure_cuda.h -action compute_temp_cuda.cpp -action compute_temp_cuda.h -action compute_temp_partial_cuda.cpp -action compute_temp_partial_cuda.h -action cuda.cpp -action cuda_data.h -action cuda_modify_flags.h -action cuda_neigh_list.cpp -action cuda_neigh_list.h -action domain_cuda.cpp -action domain_cuda.h -action fft3d_cuda.cpp pppm.cpp -action fft3d_cuda.h pppm.cpp -action fft3d_wrap_cuda.cpp pppm.cpp -action fft3d_wrap_cuda.h pppm.cpp -action fix_addforce_cuda.cpp -action fix_addforce_cuda.h -action fix_aveforce_cuda.cpp -action fix_aveforce_cuda.h -action fix_enforce2d_cuda.cpp -action fix_enforce2d_cuda.h -action fix_freeze_cuda.cpp fix_freeze.cpp -action fix_freeze_cuda.h fix_freeze.cpp -action fix_gravity_cuda.cpp -action fix_gravity_cuda.h -action fix_nh_cuda.cpp -action fix_nh_cuda.h -action fix_npt_cuda.cpp -action fix_npt_cuda.h -action fix_nve_cuda.cpp -action fix_nve_cuda.h -action fix_nvt_cuda.cpp -action fix_nvt_cuda.h -action fix_set_force_cuda.cpp -action fix_set_force_cuda.h -action fix_shake_cuda.cpp -action fix_shake_cuda.h -action fix_temp_berendsen_cuda.cpp -action fix_temp_berendsen_cuda.h -action fix_temp_rescale_cuda.cpp -action fix_temp_rescale_cuda.h -action fix_temp_rescale_limit_cuda.cpp -action fix_temp_rescale_limit_cuda.h -action fix_viscous_cuda.cpp -action fix_viscous_cuda.h -action modify_cuda.cpp -action modify_cuda.h -action neigh_full_cuda.cpp -action neighbor_cuda.cpp -action neighbor_cuda.h -action pair_born_coul_long_cuda.cpp pair_born_coul_long.cpp -action pair_born_coul_long_cuda.h pair_born_coul_long.cpp -action pair_buck_coul_cut_cuda.cpp -action pair_buck_coul_cut_cuda.h -action pair_buck_coul_long_cuda.cpp pair_buck_coul_long.cpp -action pair_buck_coul_long_cuda.h pair_buck_coul_long.cpp -action pair_buck_cuda.cpp -action pair_buck_cuda.h -action pair_eam_alloy_cuda.cpp pair_eam_alloy.cpp -action pair_eam_alloy_cuda.h pair_eam_alloy.cpp -action pair_eam_cuda.cpp pair_eam.cpp -action pair_eam_cuda.h pair_eam.cpp -action pair_eam_fs_cuda.cpp pair_eam_fs.cpp -action pair_eam_fs_cuda.h pair_eam_fs.cpp -action pair_gran_hooke_cuda.cpp pair_gran_hooke.cpp -action pair_gran_hooke_cuda.h pair_gran_hooke.cpp -action pair_lj96_cut_cuda.cpp -action pair_lj96_cut_cuda.h -action pair_lj_charmm_coul_charmm_cuda.cpp pair_lj_charmm_coul_charmm.cpp -action pair_lj_charmm_coul_charmm_cuda.h pair_lj_charmm_coul_charmm.cpp -action pair_lj_charmm_coul_charmm_implicit_cuda.cpp pair_lj_charmm_coul_charmm_implicit.cpp -action pair_lj_charmm_coul_charmm_implicit_cuda.h pair_lj_charmm_coul_charmm_implicit.cpp -action pair_lj_charmm_coul_long_cuda.cpp pair_lj_charmm_coul_long.cpp -action pair_lj_charmm_coul_long_cuda.h pair_lj_charmm_coul_long.cpp -action pair_lj_class2_coul_cut_cuda.cpp pair_lj_class2_coul_cut.cpp -action pair_lj_class2_coul_cut_cuda.h pair_lj_class2_coul_cut.cpp -action pair_lj_class2_coul_long_cuda.cpp pair_lj_class2_coul_long.cpp -action pair_lj_class2_coul_long_cuda.h pair_lj_class2_coul_long.cpp -action pair_lj_class2_cuda.cpp pair_lj_class2.cpp -action pair_lj_class2_cuda.h pair_lj_class2.cpp -action pair_lj_cut_coul_cut_cuda.cpp -action pair_lj_cut_coul_cut_cuda.h -action pair_lj_cut_coul_debye_cuda.cpp -action pair_lj_cut_coul_debye_cuda.h -action pair_lj_cut_coul_long_cuda.cpp pair_lj_cut_coul_long.cpp -action pair_lj_cut_coul_long_cuda.h pair_lj_cut_coul_long.cpp -action pair_lj_cut_cuda.cpp -action pair_lj_cut_cuda.h -action pair_lj_cut_experimental_cuda.cpp -action pair_lj_cut_experimental_cuda.h -action pair_lj_expand_cuda.cpp -action pair_lj_expand_cuda.h -action pair_lj_gromacs_coul_gromacs_cuda.cpp -action pair_lj_gromacs_coul_gromacs_cuda.h -action pair_lj_gromacs_cuda.cpp -action pair_lj_gromacs_cuda.h -action pair_lj_sdk_coul_long_cuda.cpp pair_lj_sdk_coul_long.cpp -action pair_lj_sdk_coul_long_cuda.h pair_lj_sdk_coul_long.cpp -action pair_lj_sdk_cuda.cpp pair_lj_sdk.cpp -action pair_lj_sdk_cuda.h pair_lj_sdk.cpp -action pair_lj_smooth_cuda.cpp -action pair_lj_smooth_cuda.h -action pair_morse_cuda.cpp -action pair_morse_cuda.h -action pair_sw_cuda.cpp pair_sw.cpp -action pair_sw_cuda.h pair_sw.cpp -action pair_tersoff_cuda.cpp pair_tersoff.cpp -action pair_tersoff_cuda.h pair_tersoff.cpp -action pair_tersoff_zbl_cuda.cpp pair_tersoff_zbl.cpp -action pair_tersoff_zbl_cuda.h pair_tersoff_zbl.cpp -action pppm_cuda.cpp pppm.cpp -action pppm_cuda.h pppm.cpp -action pppm_old.cpp pppm.cpp -action pppm_old.h pppm.cpp -action user_cuda.h -action verlet_cuda.cpp -action verlet_cuda.h - -# edit 2 Makefile.package files to include/exclude package info - -if (test $1 = 1) then - - if (test -e ../Makefile.package) then - sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package - sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package - sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/cuda -DLMP_USER_CUDA |' ../Makefile.package - sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/cuda |' ../Makefile.package - sed -i -e 's|^PKG_LIB =[ \t]*|&-llammpscuda |' ../Makefile.package - sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(user-cuda_SYSINC) |' ../Makefile.package - sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(user-cuda_SYSLIB) |' ../Makefile.package - sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(user-cuda_SYSPATH) |' ../Makefile.package - fi - - if (test -e ../Makefile.package.settings) then - sed -i -e '/^include.*cuda.*$/d' ../Makefile.package.settings - # multiline form needed for BSD sed on Macs - sed -i -e '4 i \ -include ..\/..\/lib\/cuda\/Makefile.lammps -' ../Makefile.package.settings - - fi - -elif (test $1 = 0) then - # need to delete a bunch of depenency files because they indirectly depend on user_cuda.h - for f in input.d output.d pair.d fix_omp.d - do \ - rm -f ../Obj_*/$f - done - if (test -e ../Makefile.package) then - sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package - sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package - fi - - if (test -e ../Makefile.package.settings) then - sed -i -e '/^include.*cuda.*$/d' ../Makefile.package.settings - fi - -fi diff --git a/src/USER-CUDA/README b/src/USER-CUDA/README deleted file mode 100644 index b9d2c07f8a..0000000000 --- a/src/USER-CUDA/README +++ /dev/null @@ -1,21 +0,0 @@ -This package provides acceleration of various LAMMPS pair styles, fix -styles, compute styles, and long-range Coulombics via PPPM for NVIDIA -GPUs. - -See this section of the manual to get started: - -doc/Section_accelerate.html, sub-section 5.4 - -There are example scripts for using this package in -examples/USER/cuda. - -This package uses an external library in lib/cuda which must be -compiled before making LAMMPS. See the lib/cuda/README file and the -LAMMPS manual for information on building LAMMPS with external -libraries. The settings in the Makefile.lammps file in that directory -must be correct for LAMMPS to build correctly with this package -installed. - -The person who created this package is Christian Trott at the -University of Technology Ilmenau, Germany (christian.trott at -tu-ilmenau.de). Contact him directly if you have questions. diff --git a/src/USER-CUDA/atom_vec_angle_cuda.cpp b/src/USER-CUDA/atom_vec_angle_cuda.cpp deleted file mode 100644 index c393d58824..0000000000 --- a/src/USER-CUDA/atom_vec_angle_cuda.cpp +++ /dev/null @@ -1,467 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "atom_vec_angle_cuda.h" -#include "comm_cuda_cu.h" -#include "atom_vec_angle_cuda_cu.h" -#include "atom.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" -#include "universe.h" -#include "comm.h" - -using namespace LAMMPS_NS; - -#define BUFFACTOR 1.5 -#define BUFEXTRA 1000 -#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image molecule - -#define BUF_CFLOAT double -/* ---------------------------------------------------------------------- */ - -AtomVecAngleCuda::AtomVecAngleCuda(LAMMPS *lmp) : AtomVecAngle(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - maxsend=0; - cudable=true; - cuda_init_done=false; - max_nsend=0; - cu_copylist=NULL; - copylist=NULL; - copylist2=NULL; -} - -void AtomVecAngleCuda::grow_copylist(int new_max_nsend) -{ - max_nsend=new_max_nsend; - delete cu_copylist; - delete [] copylist2; - if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist); - copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false); - copylist2 = new int[max_nsend]; - cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend); -} - -void AtomVecAngleCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole -{ - int old_maxsend=*maxsend+BUFEXTRA; - *maxsend = static_cast<int> (BUFFACTOR * n); - if (flag) - { - if(cuda->pinned) - { - double* tmp = new double[old_maxsend]; - memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double)); - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - memcpy(*buf_send,tmp,old_maxsend*sizeof(double)); - delete [] tmp; - } - else - { - *buf_send = (double *) - memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } - else { - if(cuda->pinned) - { - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - } - else - { - memory->sfree(*buf_send); - *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } -} - -void AtomVecAngleCuda::grow_both(int n) -{ - if(cuda->finished_setup) - { - cuda->cu_special->upload(); - cuda->cu_nspecial->upload(); - cuda->downloadAll(); - } - AtomVecAngle::grow(n); - if(cuda->finished_setup) - { - cuda->checkResize(); - cuda->uploadAll(); - } -} - -int AtomVecAngleCuda::pack_comm(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAngle::pack_comm(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -int AtomVecAngleCuda::pack_comm_vel(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAngle::pack_comm_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAngle::unpack_comm(n,first,buf); return;} - - Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf); -} - -void AtomVecAngleCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAngle::unpack_comm_vel(n,first,buf); return;} - - Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAngle::pack_reverse(n,first,buf); - - int i,m,last; - cuda->cu_f->download(); - m = 0; - last = first + n; - for (i = first; i < last; i++) { - buf[m++] = f[i][0]; - buf[m++] = f[i][1]; - buf[m++] = f[i][2]; - } - cuda->cu_f->upload(); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAngle::unpack_reverse(n,list,buf); return;} - - int i,j,m; - - m = 0; - cuda->cu_f->download(); - for (i = 0; i < n; i++) { - j = list[i]; - f[j][0] += buf[m++]; - f[j][1] += buf[m++]; - f[j][2] += buf[m++]; - } - cuda->cu_f->upload(); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleCuda::pack_border(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAngle::pack_border(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecAngleCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - return m; -} - -int AtomVecAngleCuda::pack_border_vel(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAngle::pack_border_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecAngleCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleCuda::unpack_border(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAngle::unpack_border(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecAngleCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -void AtomVecAngleCuda::unpack_border_vel(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAngle::unpack_border_vel(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecAngleCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -/* ---------------------------------------------------------------------- - pack data for atom I for sending to another proc - xyz must be 1st 3 values, so comm::exchange() can test on them -------------------------------------------------------------------------- */ - - -int AtomVecAngleCuda::pack_exchange(int dim, double *buf) -{ - if(cuda->oncpu) - return AtomVecAngle::pack_exchange(dim,buf); - - if(not cuda_init_done||domain->box_change) - { - Cuda_AtomVecAngleCuda_Init(&cuda->shared_data); - cuda_init_done=true; - } - double** buf_pointer=(double**) buf; - if(*maxsend<atom->nghost || *buf_pointer==NULL) - { - grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0); - *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend; - } - - if(max_nsend==0) grow_copylist(200); - - int nsend_atoms = Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - if(nsend_atoms*NCUDAEXCHANGE>*maxsend) - { - grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0); - Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - } - - int nlocal=atom->nlocal-nsend_atoms; - - for(int i=0;i<nsend_atoms;i++) copylist2[i]=1; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i>=nlocal) copylist2[i-nlocal]=-1; - } - - int actpos=0; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i<nlocal) - { - while(copylist2[actpos]==-1) actpos++; - copylist[j-1]=nlocal+actpos; - actpos++; - } - } - cu_copylist->upload(); - - cuda->shared_data.atom.nlocal=nlocal; - - int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data()); - - my_times time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - - double* buf_p=*buf_pointer; - for(int j=0;j<nsend_atoms;j++) - { - int i=static_cast <int> (buf_p[j+1]); - int nextra=0; - int k; - buf_p[m++] = num_bond[i]; - for (k = 0; k < num_bond[i]; k++) { - buf_p[m++] = bond_type[i][k]; - buf_p[m++] = bond_atom[i][k]; - } - nextra+=2*num_bond[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = num_angle[i]; - for (k = 0; k < num_angle[i]; k++) { - buf_p[m++] = angle_type[i][k]; - buf_p[m++] = angle_atom1[i][k]; - buf_p[m++] = angle_atom2[i][k]; - buf_p[m++] = angle_atom3[i][k]; - } - nextra+=4*num_angle[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = nspecial[i][0]; - buf_p[m++] = nspecial[i][1]; - buf_p[m++] = nspecial[i][2]; - for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k]; - nextra+=nspecial[i][2]+3; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - { - int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]); - m+=dm; - nextra+=dm; - if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1); - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - } - - if(i<nlocal)AtomVecAngle::copy(copylist[j],i,1); - (*buf_pointer)[j+1] = nextra; - } - - my_gettime(CLOCK_REALTIME,&time2); - cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - (*buf_pointer)[0] = nsend_atoms; - atom->nlocal-=nsend_atoms; - cuda->shared_data.atom.update_nlocal=2; - //printf("End Pack Exchange\n"); - if(m==1) return 0; - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleCuda::unpack_exchange(double *buf) -{ -// printf("Begin UnPack Exchange\n"); - if(cuda->oncpu) - return AtomVecAngle::unpack_exchange(buf); - - int dim=cuda->shared_data.exchange_dim; - if(domain->box_change) - Cuda_AtomVecAngleCuda_Init(&cuda->shared_data); - - int mfirst=0; - for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++) - { - int nlocal = atom->nlocal; - int nsend_atoms=static_cast<int> (buf[0]); - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - - if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data - int naccept = Cuda_AtomVecAngleCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data()); - cu_copylist->download(); - int m = nsend_atoms*NCUDAEXCHANGE + 1; - nlocal+=naccept; - - my_times time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - - for(int j=0;j<nsend_atoms;j++) - { - if(copylist[j]>-1) - { - int k; - int i=copylist[j]; - num_bond[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_bond[i]; k++) { - bond_type[i][k] = static_cast<int> (buf[m++]); - bond_atom[i][k] = static_cast<int> (buf[m++]); - } - - num_angle[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_angle[i]; k++) { - angle_type[i][k] = static_cast<int> (buf[m++]); - angle_atom1[i][k] = static_cast<int> (buf[m++]); - angle_atom2[i][k] = static_cast<int> (buf[m++]); - angle_atom3[i][k] = static_cast<int> (buf[m++]); - } - - nspecial[i][0] = static_cast<int> (buf[m++]); - nspecial[i][1] = static_cast<int> (buf[m++]); - nspecial[i][2] = static_cast<int> (buf[m++]); - for (k = 0; k < nspecial[i][2]; k++) - special[i][k] = static_cast<int> (buf[m++]); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(i,&buf[m]); - - } - else - m+=static_cast <int> (buf[j+1]); - } - - my_gettime(CLOCK_REALTIME,&time2); - cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - cuda->shared_data.atom.nlocal=nlocal; - cuda->shared_data.atom.update_nlocal=2; - atom->nlocal=nlocal; - mfirst+=m; - buf=&buf[m]; - } - return mfirst; -} diff --git a/src/USER-CUDA/atom_vec_angle_cuda.h b/src/USER-CUDA/atom_vec_angle_cuda.h deleted file mode 100644 index 13913da1c0..0000000000 --- a/src/USER-CUDA/atom_vec_angle_cuda.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef ATOM_CLASS - -AtomStyle(angle/cuda,AtomVecAngleCuda) - -#else - -#ifndef LMP_ATOM_VEC_ANGLE_CUDA_H -#define LMP_ATOM_VEC_ANGLE_CUDA_H - -#include "atom_vec_angle.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class AtomVecAngleCuda : public AtomVecAngle { - public: - AtomVecAngleCuda(class LAMMPS *); - virtual ~AtomVecAngleCuda() {} - void grow_copylist(int n); - void grow_send(int n,double** buf_send,int flag); - void grow_both(int n); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - private: - class Cuda *cuda; - bool cuda_init_done; - int* copylist; - int* copylist2; - cCudaData<int, int, xx >* cu_copylist; - int max_nsend; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/atom_vec_atomic_cuda.cpp b/src/USER-CUDA/atom_vec_atomic_cuda.cpp deleted file mode 100644 index c54f7d3127..0000000000 --- a/src/USER-CUDA/atom_vec_atomic_cuda.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "atom_vec_atomic_cuda.h" -#include "comm_cuda_cu.h" -#include "atom_vec_atomic_cuda_cu.h" -#include "atom.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" -#include "comm.h" - -using namespace LAMMPS_NS; - -#define BUFFACTOR 1.5 -#define BUFEXTRA 1000 -#define NCUDAEXCHANGE 11 //nextra x y z vx vy vz tag type mask image - - -#define BUF_CFLOAT double -/* ---------------------------------------------------------------------- */ - -AtomVecAtomicCuda::AtomVecAtomicCuda(LAMMPS *lmp) : AtomVecAtomic(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - maxsend=0; - cudable=true; - cuda_init_done=false; - max_nsend=0; - cu_copylist=NULL; - copylist=NULL; - copylist2=NULL; -} - -void AtomVecAtomicCuda::grow_copylist(int new_max_nsend) -{ - max_nsend=new_max_nsend; - delete cu_copylist; - delete [] copylist2; - if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist); - copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false); - copylist2 = new int[max_nsend]; - cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend); -} - -void AtomVecAtomicCuda::grow_send(int n,double** buf_send,int flag) -{ - int old_maxsend=*maxsend+BUFEXTRA; - *maxsend = static_cast<int> (BUFFACTOR * n); - if (flag) - { - if(cuda->pinned) - { - double* tmp = new double[old_maxsend]; - memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double)); - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - memcpy(*buf_send,tmp,old_maxsend*sizeof(double)); - delete [] tmp; - } - else - { - *buf_send = (double *) - memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } - else { - if(cuda->pinned) - { - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - } - else - { - memory->sfree(*buf_send); - *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } -} - -void AtomVecAtomicCuda::grow_both(int n) -{ - if(cuda->finished_setup) - cuda->downloadAll(); - AtomVecAtomic::grow(n); - if(cuda->finished_setup) - { - cuda->checkResize(); - cuda->uploadAll(); - } -} - -int AtomVecAtomicCuda::pack_comm(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAtomic::pack_comm(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -int AtomVecAtomicCuda::pack_comm_vel(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAtomic::pack_comm_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicCuda::unpack_comm(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAtomic::unpack_comm(n,first,buf); return;} - - Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf); -} - -void AtomVecAtomicCuda::unpack_comm_vel(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAtomic::unpack_comm_vel(n,first,buf); return;} - - Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf); -} -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicCuda::pack_reverse(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAtomic::pack_reverse(n,first,buf); - - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - buf[m++] = f[i][0]; - buf[m++] = f[i][1]; - buf[m++] = f[i][2]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicCuda::unpack_reverse(int n, int *list, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAtomic::unpack_reverse(n,list,buf); return;} - - int i,j,m; - - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - f[j][0] += buf[m++]; - f[j][1] += buf[m++]; - f[j][2] += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicCuda::pack_border(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAtomic::pack_border(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecAtomicCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} - -int AtomVecAtomicCuda::pack_border_vel(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecAtomic::pack_border_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecAtomicCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicCuda::unpack_border(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAtomic::unpack_border(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) - { - grow_both(0); - } - int flag=Cuda_AtomVecAtomicCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} - -} - -void AtomVecAtomicCuda::unpack_border_vel(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecAtomic::unpack_border_vel(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) - { - grow_both(0); - } - int flag=Cuda_AtomVecAtomicCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} -/* ---------------------------------------------------------------------- - pack data for atom I for sending to another proc - xyz must be 1st 3 values, so comm::exchange() can test on them -------------------------------------------------------------------------- */ - - -int AtomVecAtomicCuda::pack_exchange(int dim, double *buf) -{ - if(cuda->oncpu) - return AtomVecAtomic::pack_exchange(dim,buf); - - if(not cuda_init_done||domain->box_change) - { - Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data); - cuda_init_done=true; - } - double** buf_pointer=(double**) buf; - if(*maxsend<atom->nghost || *buf_pointer==NULL) - { - grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0); - *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend; - } - - if(max_nsend==0) grow_copylist(200); - - int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - - if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);} - if(nsend_atoms*NCUDAEXCHANGE>*maxsend) - { - grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0); - Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - } - - int nlocal=atom->nlocal-nsend_atoms; - - for(int i=0;i<nsend_atoms;i++) copylist2[i]=1; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i>=nlocal) copylist2[i-nlocal]=-1; - } - - int actpos=0; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i<nlocal) - { - while(copylist2[actpos]==-1) actpos++; - copylist[j-1]=nlocal+actpos; - actpos++; - } - } - cu_copylist->upload(); - - cuda->shared_data.atom.nlocal=nlocal; - - int m = Cuda_AtomVecAtomicCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data()); - if (atom->nextra_grow) - for(int j=0;j<nsend_atoms;j++) - { - int i=static_cast <int> ((*buf_pointer)[j+1]); - int nextra=0; - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - - int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m])); - m+=dm; - nextra+=dm; - if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1); - if(m>*maxsend) grow_send(m,buf_pointer,1); - } - (*buf_pointer)[j+1] = nextra; - - } - - (*buf_pointer)[0] = nsend_atoms; - atom->nlocal-=nsend_atoms; - cuda->shared_data.atom.update_nlocal=2; - - if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicCuda::unpack_exchange(double *buf) -{ - //printf("Unpack Begin\n"); - if(cuda->oncpu) - return AtomVecAtomic::unpack_exchange(buf); - - int dim=cuda->shared_data.exchange_dim; - if(domain->box_change) - Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data); - - int mfirst=0; - for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++) - { - int nlocal = atom->nlocal; - - int nsend_atoms=static_cast<int> (buf[0]); - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - - if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); - int naccept = Cuda_AtomVecAtomicCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data()); - cu_copylist->download(); - int m = nsend_atoms*NCUDAEXCHANGE + 1; - nlocal+=naccept; - if (atom->nextra_grow) - for(int j=0;j<nsend_atoms;j++) - { - if(copylist[j]>-1) - { - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(copylist[j],&buf[m]); - } - else - { - m+=static_cast <int> (buf[j+1]); - } - } - cuda->shared_data.atom.nlocal=nlocal; - if(atom->nlocal!=nlocal) - cuda->shared_data.atom.update_nlocal=2; - atom->nlocal=nlocal; - mfirst+=m; - buf=&buf[m]; - } - return mfirst; -} diff --git a/src/USER-CUDA/atom_vec_atomic_cuda.h b/src/USER-CUDA/atom_vec_atomic_cuda.h deleted file mode 100644 index dabbd9a215..0000000000 --- a/src/USER-CUDA/atom_vec_atomic_cuda.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ -#ifdef ATOM_CLASS - -AtomStyle(atomic/cuda,AtomVecAtomicCuda) - -#else - -#ifndef LMP_ATOM_VEC_ATOMIC_CUDA_H -#define LMP_ATOM_VEC_ATOMIC_CUDA_H - -#include "atom_vec_atomic.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class AtomVecAtomicCuda : public AtomVecAtomic { - public: - AtomVecAtomicCuda(class LAMMPS *); - virtual ~AtomVecAtomicCuda() {} - void grow_copylist(int n); - void grow_send(int n,double** buf_send,int flag); - void grow_both(int n); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - private: - class Cuda *cuda; - bool cuda_init_done; - int* copylist; - int* copylist2; - cCudaData<int, int, xx >* cu_copylist; - int max_nsend; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/atom_vec_charge_cuda.cpp b/src/USER-CUDA/atom_vec_charge_cuda.cpp deleted file mode 100644 index 07140f3e84..0000000000 --- a/src/USER-CUDA/atom_vec_charge_cuda.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "atom_vec_charge_cuda.h" -#include "comm_cuda_cu.h" -#include "atom_vec_charge_cuda_cu.h" -#include "atom.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" -#include "comm.h" - -using namespace LAMMPS_NS; - -#define BUFFACTOR 1.5 -#define BUFEXTRA 1000 -#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image q - -#define BUF_CFLOAT double -/* ---------------------------------------------------------------------- */ - -AtomVecChargeCuda::AtomVecChargeCuda(LAMMPS *lmp) : AtomVecCharge(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - maxsend=0; - cudable=true; - cuda_init_done=false; - max_nsend=0; - cu_copylist=NULL; - copylist=NULL; - copylist2=NULL; -} - -void AtomVecChargeCuda::grow_copylist(int new_max_nsend) -{ - max_nsend=new_max_nsend; - delete cu_copylist; - delete [] copylist2; - if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist); - copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false); - copylist2 = new int[max_nsend]; - cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend); -} - -void AtomVecChargeCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole -{ - int old_maxsend=*maxsend+BUFEXTRA; - *maxsend = static_cast<int> (BUFFACTOR * n); - if (flag) - { - if(cuda->pinned) - { - double* tmp = new double[old_maxsend]; - memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double)); - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - memcpy(*buf_send,tmp,old_maxsend*sizeof(double)); - delete [] tmp; - } - else - { - *buf_send = (double *) - memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } - else { - if(cuda->pinned) - { - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - } - else - { - memory->sfree(*buf_send); - *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } -} - -void AtomVecChargeCuda::grow_both(int n) -{ - if(cuda->finished_setup) - cuda->downloadAll(); - AtomVecCharge::grow(n); - if(cuda->finished_setup) - { - cuda->checkResize(); - cuda->uploadAll(); - } -} - -int AtomVecChargeCuda::pack_comm(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecCharge::pack_comm(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -int AtomVecChargeCuda::pack_comm_vel(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecCharge::pack_comm_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecCharge::unpack_comm(n,first,buf); return;} - - Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf); -} - -void AtomVecChargeCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecCharge::unpack_comm_vel(n,first,buf); return;} - - Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecCharge::pack_reverse(n,first,buf); - - int i,m,last; - cuda->cu_f->download(); - m = 0; - last = first + n; - for (i = first; i < last; i++) { - buf[m++] = f[i][0]; - buf[m++] = f[i][1]; - buf[m++] = f[i][2]; - } - cuda->cu_f->upload(); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecCharge::unpack_reverse(n,list,buf); return;} - - int i,j,m; - - m = 0; - cuda->cu_f->download(); - for (i = 0; i < n; i++) { - j = list[i]; - f[j][0] += buf[m++]; - f[j][1] += buf[m++]; - f[j][2] += buf[m++]; - } - cuda->cu_f->upload(); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeCuda::pack_border(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecCharge::pack_border(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecChargeCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} - -int AtomVecChargeCuda::pack_border_vel(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecCharge::pack_border_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecChargeCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeCuda::unpack_border(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecCharge::unpack_border(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecChargeCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -void AtomVecChargeCuda::unpack_border_vel(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecCharge::unpack_border_vel(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecChargeCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -/* ---------------------------------------------------------------------- - pack data for atom I for sending to another proc - xyz must be 1st 3 values, so comm::exchange() can test on them -------------------------------------------------------------------------- */ - - -int AtomVecChargeCuda::pack_exchange(int dim, double *buf) -{ - if(cuda->oncpu) - return AtomVecCharge::pack_exchange(dim,buf); - - if(not cuda_init_done||domain->box_change) - { - Cuda_AtomVecChargeCuda_Init(&cuda->shared_data); - cuda_init_done=true; - } - double** buf_pointer=(double**) buf; - if(*maxsend<atom->nghost || *buf_pointer==NULL) - { - grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0); - *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend; - } - - if(max_nsend==0) grow_copylist(200); - - int nsend_atoms = Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - if(nsend_atoms*NCUDAEXCHANGE>*maxsend) - { - grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0); - Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - } - - int nlocal=atom->nlocal-nsend_atoms; - - for(int i=0;i<nsend_atoms;i++) copylist2[i]=1; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i>=nlocal) copylist2[i-nlocal]=-1; - } - - int actpos=0; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i<nlocal) - { - while(copylist2[actpos]==-1) actpos++; - copylist[j-1]=nlocal+actpos; - actpos++; - } - } - cu_copylist->upload(); - - cuda->shared_data.atom.nlocal=nlocal; - - int m = Cuda_AtomVecChargeCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data()); - - if (atom->nextra_grow) - for(int j=0;j<nsend_atoms;j++) - { - int i=static_cast <int> ((*buf_pointer)[j+1]); - int nextra=0; - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { - - int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m])); - m+=dm; - nextra+=dm; - if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1); - if(m>*maxsend) grow_send(m,buf_pointer,1); - } - (*buf_pointer)[j+1] = nextra; - } - - (*buf_pointer)[0] = nsend_atoms; - atom->nlocal-=nsend_atoms; - cuda->shared_data.atom.update_nlocal=2; - - if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeCuda::unpack_exchange(double *buf) -{ - if(cuda->oncpu) - return AtomVecCharge::unpack_exchange(buf); - - int dim=cuda->shared_data.exchange_dim; - if(domain->box_change) - Cuda_AtomVecChargeCuda_Init(&cuda->shared_data); - - int mfirst=0; - for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++) - { - int nlocal = atom->nlocal; - int nsend_atoms=static_cast<int> (buf[0]); - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - - if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); - int naccept = Cuda_AtomVecChargeCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data()); - cu_copylist->download(); - int m = nsend_atoms*NCUDAEXCHANGE + 1; - nlocal+=naccept; - if (atom->nextra_grow) - for(int j=0;j<nsend_atoms;j++) - { - if(copylist[j]>-1) - { - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(copylist[j],&buf[m]); - } - else - m+=static_cast <int> (buf[j+1]); - } - cuda->shared_data.atom.nlocal=nlocal; - cuda->shared_data.atom.update_nlocal=2; - atom->nlocal=nlocal; - mfirst+=m; - buf=&buf[m]; - } - return mfirst; -} diff --git a/src/USER-CUDA/atom_vec_charge_cuda.h b/src/USER-CUDA/atom_vec_charge_cuda.h deleted file mode 100644 index 25d431c917..0000000000 --- a/src/USER-CUDA/atom_vec_charge_cuda.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef ATOM_CLASS - -AtomStyle(charge/cuda,AtomVecChargeCuda) - -#else - -#ifndef LMP_ATOM_VEC_CHARGE_CUDA_H -#define LMP_ATOM_VEC_CHARGE_CUDA_H - -#include "atom_vec_charge.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class AtomVecChargeCuda : public AtomVecCharge { - public: - AtomVecChargeCuda(class LAMMPS *); - virtual ~AtomVecChargeCuda() {} - void grow_copylist(int n); - void grow_send(int n,double** buf_send,int flag); - void grow_both(int n); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - private: - class Cuda *cuda; - bool cuda_init_done; - int* copylist; - int* copylist2; - cCudaData<int, int, xx >* cu_copylist; - int max_nsend; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/atom_vec_full_cuda.cpp b/src/USER-CUDA/atom_vec_full_cuda.cpp deleted file mode 100644 index dda12603db..0000000000 --- a/src/USER-CUDA/atom_vec_full_cuda.cpp +++ /dev/null @@ -1,508 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "atom_vec_full_cuda.h" -#include "comm_cuda_cu.h" -#include "atom_vec_full_cuda_cu.h" -#include "atom.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" -#include "universe.h" -#include "comm.h" - -using namespace LAMMPS_NS; - -#define BUFFACTOR 1.5 -#define BUFEXTRA 1000 -#define NCUDAEXCHANGE 13 //nextra x y z vx vy vz tag type mask image q molecule - -#define BUF_CFLOAT double -/* ---------------------------------------------------------------------- */ - -AtomVecFullCuda::AtomVecFullCuda(LAMMPS *lmp) : - AtomVecFull(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - maxsend=0; - cudable=true; - cuda_init_done=false; - max_nsend=0; - cu_copylist=NULL; - copylist=NULL; - copylist2=NULL; -} - -void AtomVecFullCuda::grow_copylist(int new_max_nsend) -{ - max_nsend=new_max_nsend; - delete cu_copylist; - delete [] copylist2; - if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist); - copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false); - copylist2 = new int[max_nsend]; - cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend); -} - -void AtomVecFullCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole -{ - int old_maxsend=*maxsend+BUFEXTRA; - *maxsend = static_cast<int> (BUFFACTOR * n); - if (flag) - { - if(cuda->pinned) - { - double* tmp = new double[old_maxsend]; - memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double)); - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - memcpy(*buf_send,tmp,old_maxsend*sizeof(double)); - delete [] tmp; - } - else - { - *buf_send = (double *) - memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } - else { - if(cuda->pinned) - { - if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send)); - *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false); - } - else - { - memory->sfree(*buf_send); - *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - } -} - -void AtomVecFullCuda::grow_both(int n) -{ - if(cuda->finished_setup) - { - cuda->cu_special->upload(); - cuda->cu_nspecial->upload(); - cuda->downloadAll(); - } - AtomVecFull::grow(n); - if(cuda->finished_setup) - { - cuda->checkResize(); - cuda->uploadAll(); - } -} - -int AtomVecFullCuda::pack_comm(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecFull::pack_comm(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -int AtomVecFullCuda::pack_comm_vel(int n, int* iswap, double *buf, - int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecFull::pack_comm_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - if((sizeof(X_CFLOAT)!=sizeof(double)) && m) - m=(m+1)*sizeof(X_CFLOAT)/sizeof(double); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecFull::unpack_comm(n,first,buf); return;} - - Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf); -} - -void AtomVecFullCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecFull::unpack_comm_vel(n,first,buf); return;} - - Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecFull::pack_reverse(n,first,buf); - - int i,m,last; - cuda->cu_f->download(); - m = 0; - last = first + n; - for (i = first; i < last; i++) { - buf[m++] = f[i][0]; - buf[m++] = f[i][1]; - buf[m++] = f[i][2]; - } - cuda->cu_f->upload(); - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecFull::unpack_reverse(n,list,buf); return;} - - int i,j,m; - - m = 0; - cuda->cu_f->download(); - for (i = 0; i < n; i++) { - j = list[i]; - f[j][0] += buf[m++]; - f[j][1] += buf[m++]; - f[j][2] += buf[m++]; - } - cuda->cu_f->upload(); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullCuda::pack_border(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecFull::pack_border(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecFullCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - return m; -} - -int AtomVecFullCuda::pack_border_vel(int n, int *iswap, double *buf, - int pbc_flag, int *pbc) -{ - if(not cuda->finished_setup || cuda->oncpu) - return AtomVecFull::pack_border_vel(n,iswap,buf,pbc_flag,pbc); - - int m = Cuda_AtomVecFullCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullCuda::unpack_border(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecFull::unpack_border(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecFullCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -void AtomVecFullCuda::unpack_border_vel(int n, int first, double *buf) -{ - if(not cuda->finished_setup || cuda->oncpu) - {AtomVecFull::unpack_border_vel(n,first,buf); return;} - while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data - { - grow_both(0); - } - int flag=Cuda_AtomVecFullCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf); - if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");} -} - -/* ---------------------------------------------------------------------- - pack data for atom I for sending to another proc - xyz must be 1st 3 values, so comm::exchange() can test on them -------------------------------------------------------------------------- */ - - -int AtomVecFullCuda::pack_exchange(int dim, double *buf) -{ - if(cuda->oncpu) - return AtomVecFull::pack_exchange(dim,buf); - - if(not cuda_init_done||domain->box_change) - { - Cuda_AtomVecFullCuda_Init(&cuda->shared_data); - cuda_init_done=true; - } - double** buf_pointer=(double**) buf; - if(*maxsend<atom->nghost || *buf_pointer==NULL) - { - grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0); - *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend; - } - - if(max_nsend==0) grow_copylist(200); - - int nsend_atoms = Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - if(nsend_atoms*NCUDAEXCHANGE>*maxsend) - { - grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0); - Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer); - } - - int nlocal=atom->nlocal-nsend_atoms; - - for(int i=0;i<nsend_atoms;i++) copylist2[i]=1; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i>=nlocal) copylist2[i-nlocal]=-1; - } - - int actpos=0; - for(int j=1;j<nsend_atoms+1;j++) - { - int i = static_cast <int> ((*buf_pointer)[j]); - if(i<nlocal) - { - while(copylist2[actpos]==-1) actpos++; - copylist[j-1]=nlocal+actpos; - actpos++; - } - } - cu_copylist->upload(); - - cuda->shared_data.atom.nlocal=nlocal; - - int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data()); - - my_times time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - - double* buf_p=*buf_pointer; - for(int j=0;j<nsend_atoms;j++) - { - int i=static_cast <int> (buf_p[j+1]); - int nextra=0; - int k; - buf_p[m++] = num_bond[i]; - for (k = 0; k < num_bond[i]; k++) { - buf_p[m++] = bond_type[i][k]; - buf_p[m++] = bond_atom[i][k]; - } - nextra+=2*num_bond[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = num_angle[i]; - for (k = 0; k < num_angle[i]; k++) { - buf_p[m++] = angle_type[i][k]; - buf_p[m++] = angle_atom1[i][k]; - buf_p[m++] = angle_atom2[i][k]; - buf_p[m++] = angle_atom3[i][k]; - } - nextra+=4*num_angle[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = num_dihedral[i]; - for (k = 0; k < num_dihedral[i]; k++) { - buf_p[m++] = dihedral_type[i][k]; - buf_p[m++] = dihedral_atom1[i][k]; - buf_p[m++] = dihedral_atom2[i][k]; - buf_p[m++] = dihedral_atom3[i][k]; - buf_p[m++] = dihedral_atom4[i][k]; - } - nextra+=5*num_dihedral[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = num_improper[i]; - for (k = 0; k < num_improper[i]; k++) { - buf_p[m++] = improper_type[i][k]; - buf_p[m++] = improper_atom1[i][k]; - buf_p[m++] = improper_atom2[i][k]; - buf_p[m++] = improper_atom3[i][k]; - buf_p[m++] = improper_atom4[i][k]; - } - nextra+=5*num_improper[i]+1; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - buf_p[m++] = nspecial[i][0]; - buf_p[m++] = nspecial[i][1]; - buf_p[m++] = nspecial[i][2]; - for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k]; - nextra+=nspecial[i][2]+3; - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - { - int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]); - m+=dm; - nextra+=dm; - if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1); - if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;} - } - - if(i<nlocal)AtomVecFull::copy(copylist[j],i,1); - (*buf_pointer)[j+1] = nextra; - } - - my_gettime(CLOCK_REALTIME,&time2); - cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - (*buf_pointer)[0] = nsend_atoms; - atom->nlocal-=nsend_atoms; - cuda->shared_data.atom.update_nlocal=2; - //printf("End Pack Exchange\n"); - if(m==1) return 0; - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullCuda::unpack_exchange(double *buf) -{ -// printf("Begin UnPack Exchange\n"); - if(cuda->oncpu) - return AtomVecFull::unpack_exchange(buf); - - int dim=cuda->shared_data.exchange_dim; - if(domain->box_change) - Cuda_AtomVecFullCuda_Init(&cuda->shared_data); - - int mfirst=0; - for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++) - { - int nlocal = atom->nlocal; - int nsend_atoms=static_cast<int> (buf[0]); - if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100); - - if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data - int naccept = Cuda_AtomVecFullCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data()); - cu_copylist->download(); - int m = nsend_atoms*NCUDAEXCHANGE + 1; - nlocal+=naccept; - - my_times time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - - for(int j=0;j<nsend_atoms;j++) - { - if(copylist[j]>-1) - { - int k; - int i=copylist[j]; - num_bond[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_bond[i]; k++) { - bond_type[i][k] = static_cast<int> (buf[m++]); - bond_atom[i][k] = static_cast<int> (buf[m++]); - } - - num_angle[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_angle[i]; k++) { - angle_type[i][k] = static_cast<int> (buf[m++]); - angle_atom1[i][k] = static_cast<int> (buf[m++]); - angle_atom2[i][k] = static_cast<int> (buf[m++]); - angle_atom3[i][k] = static_cast<int> (buf[m++]); - } - - num_dihedral[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_dihedral[i]; k++) { - dihedral_type[i][k] = static_cast<int> (buf[m++]); - dihedral_atom1[i][k] = static_cast<int> (buf[m++]); - dihedral_atom2[i][k] = static_cast<int> (buf[m++]); - dihedral_atom3[i][k] = static_cast<int> (buf[m++]); - dihedral_atom4[i][k] = static_cast<int> (buf[m++]); - } - - num_improper[i] = static_cast<int> (buf[m++]); - for (k = 0; k < num_improper[i]; k++) { - improper_type[i][k] = static_cast<int> (buf[m++]); - improper_atom1[i][k] = static_cast<int> (buf[m++]); - improper_atom2[i][k] = static_cast<int> (buf[m++]); - improper_atom3[i][k] = static_cast<int> (buf[m++]); - improper_atom4[i][k] = static_cast<int> (buf[m++]); - } - - nspecial[i][0] = static_cast<int> (buf[m++]); - nspecial[i][1] = static_cast<int> (buf[m++]); - nspecial[i][2] = static_cast<int> (buf[m++]); - for (k = 0; k < nspecial[i][2]; k++) - special[i][k] = static_cast<int> (buf[m++]); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(i,&buf[m]); - - } - else - m+=static_cast <int> (buf[j+1]); - } - - my_gettime(CLOCK_REALTIME,&time2); - cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - cuda->shared_data.atom.nlocal=nlocal; - cuda->shared_data.atom.update_nlocal=2; - atom->nlocal=nlocal; - mfirst+=m; - buf=&buf[m]; - } - return mfirst; -} diff --git a/src/USER-CUDA/atom_vec_full_cuda.h b/src/USER-CUDA/atom_vec_full_cuda.h deleted file mode 100644 index 4ce1b24a51..0000000000 --- a/src/USER-CUDA/atom_vec_full_cuda.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef ATOM_CLASS - -AtomStyle(full/cuda,AtomVecFullCuda) - -#else - -#ifndef LMP_ATOM_VEC_FULL_CUDA_H -#define LMP_ATOM_VEC_FULL_CUDA_H - -#include "atom_vec_full.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class AtomVecFullCuda : public AtomVecFull { - public: - AtomVecFullCuda(class LAMMPS *); - virtual ~AtomVecFullCuda() {} - void grow_copylist(int n); - void grow_send(int n,double** buf_send,int flag); - void grow_both(int n); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - private: - class Cuda *cuda; - bool cuda_init_done; - int* copylist; - int* copylist2; - cCudaData<int, int, xx >* cu_copylist; - int max_nsend; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/comm_cuda.cpp b/src/USER-CUDA/comm_cuda.cpp deleted file mode 100644 index a03f873ce2..0000000000 --- a/src/USER-CUDA/comm_cuda.cpp +++ /dev/null @@ -1,1375 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author (triclinic) : Pieter in 't Veld (SNL) -------------------------------------------------------------------------- */ - -#ifdef LAMMPS_BIGBIG -#error LAMMPS_BIGBIG not supported by this file -#endif - -#include <mpi.h> -#include <cmath> -#include <cstring> -#include <cstdio> -#include <cstdlib> -#include "comm_cuda.h" -#include "atom.h" -#include "atom_vec.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "neighbor.h" -#include "modify.h" -#include "fix.h" -#include "group.h" -#include "compute.h" -#include "user_cuda.h" -#include "error.h" -#include "memory.h" -#include "comm_cuda_cu.h" - -using namespace LAMMPS_NS; - -#define BUFFACTOR 1.5 -#define BUFMIN 1000 -#define BUFEXTRA 1000 - - - -#define BIG 1.0e20 - -enum{SINGLE,MULTI}; - -/* ---------------------------------------------------------------------- - setup MPI and allocate buffer space -------------------------------------------------------------------------- */ - -CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - cu_pbc=NULL; - cu_slablo=NULL; - cu_slabhi=NULL; - cu_multilo=NULL; - cu_multihi=NULL; - cu_sendlist=NULL; - - - memory->sfree(buf_send); - memory->sfree(buf_recv); - buf_send = NULL; - buf_recv = NULL; - - CommBrick::free_swap(); - allocate_swap(maxswap); -} - -/* ---------------------------------------------------------------------- */ - -CommCuda::~CommCuda() -{ - delete cu_sendlist; - if(cuda->pinned) - { - CudaWrapper_FreePinnedHostData((void*)buf_send); - CudaWrapper_FreePinnedHostData((void*)buf_recv); - } - else - { - memory->sfree(buf_send); - memory->sfree(buf_recv); - } - buf_send=NULL; - buf_recv=NULL; -} - -/* ---------------------------------------------------------------------- */ - -void CommCuda::init() -{ - if(not buf_send) - grow_send(maxsend,0); - if(not buf_recv) - grow_recv(maxrecv); - if(not cu_sendlist) - { - cu_sendlist=new cCudaData<int, int, xy> ((int*)sendlist,maxswap,BUFMIN); - cuda->shared_data.comm.sendlist.dev_data=cu_sendlist->dev_data(); - cuda->shared_data.comm.maxswap=maxswap; - cuda->shared_data.comm.maxlistlength=BUFMIN; - cu_sendlist->upload(); - } - delete cu_pbc; - cu_pbc=new cCudaData<int, int, xy> ((int*)pbc,cuda->shared_data.comm.maxswap,6); - cu_pbc->upload(); - - delete cu_slablo; - cu_slablo = new cCudaData<double, X_CFLOAT,x>(slablo,cuda->shared_data.comm.maxswap); - cu_slablo->upload(); - - delete cu_slabhi; - cu_slabhi = new cCudaData<double, X_CFLOAT,x>(slabhi,cuda->shared_data.comm.maxswap); - cu_slabhi->upload(); - - cuda->shared_data.comm.pbc.dev_data=cu_pbc->dev_data(); - cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data(); - cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data(); - - CommBrick::init(); -} - -/* ---------------------------------------------------------------------- - setup spatial-decomposition communication patterns - function of neighbor cutoff(s) & cutghostuser & current box size - single style sets slab boundaries (slablo,slabhi) based on max cutoff - multi style sets type-dependent slab boundaries (multilo,multihi) -------------------------------------------------------------------------- */ - -void CommCuda::setup() -{ - if(cuda->shared_data.pair.neighall) cutghostuser = MAX(2.0*neighbor->cutneighmax,cutghostuser); - CommBrick::setup(); - - //upload changed geometry to device - if(style == SINGLE) - { - if(cu_slablo) cu_slablo->upload(); - if(cu_slabhi) cu_slabhi->upload(); - } - else - { - if(cu_multilo) cu_multilo->upload(); - if(cu_multihi) cu_multihi->upload(); - } -} - -/* ---------------------------------------------------------------------- - forward communication of atom coords every timestep - other per-atom attributes may also be sent via pack/unpack routines -------------------------------------------------------------------------- */ - -void CommCuda::forward_comm(int mode) -{ - if(mode==0) return forward_comm_cuda(); - if(mode==1) return forward_comm_pack_cuda(); - if(mode==2) return forward_comm_transfer_cuda(); - if(mode==3) return forward_comm_unpack_cuda(); -} - - -void CommCuda::forward_comm_cuda() -{ - my_times time1,time2,time3; - - int n; - MPI_Request request; - AtomVec *avec = atom->avec; - - cuda->shared_data.domain.xy=domain->xy; - cuda->shared_data.domain.xz=domain->xz; - cuda->shared_data.domain.yz=domain->yz; - cuda->shared_data.domain.prd[0]=domain->prd[0]; - cuda->shared_data.domain.prd[1]=domain->prd[1]; - cuda->shared_data.domain.prd[2]=domain->prd[2]; - cuda->shared_data.domain.triclinic=domain->triclinic; - if(not comm_x_only && not avec->cudable) - { - cuda->downloadAll(); - CommBrick::forward_comm(); - cuda->uploadAll(); - return; - } - - // exchange data with another proc - // if other proc is self, just copy - // if comm_x_only set, exchange or copy directly to x, don't unpack - - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) - { - if (comm_x_only) - { - - int size_forward_recv_now=0; - - if((sizeof(X_CFLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used - size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_CFLOAT)/sizeof(double); - else - size_forward_recv_now=size_forward_recv[iswap]; -my_gettime(CLOCK_REALTIME,&time1); - - MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE, - recvproc[iswap],0,world,&request); - n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) buf_send,pbc[iswap],pbc_flag[iswap]); - -my_gettime(CLOCK_REALTIME,&time2); - - if((sizeof(X_CFLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used - n=(n+1)*sizeof(X_CFLOAT)/sizeof(double); - - //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now,n); - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - -my_gettime(CLOCK_REALTIME,&time3); -cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= - time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; -cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= - time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000; - - Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],(void*)buf_recv,iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] - - } - else if (ghost_velocity) - { - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - - if(avec->cudable) - n = avec->pack_comm_vel(sendnum[iswap],&iswap, - buf_send,pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); - } - else - { - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - - if(avec->cudable) - n = avec->pack_comm(sendnum[iswap],&iswap, - buf_send,pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); - } - - } - else //sendproc == me - { - cuda->self_comm=1; - if (comm_x_only) - { - if (sendnum[iswap]) - { - n = Cuda_CommCuda_PackComm_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap],pbc[iswap],pbc_flag[iswap]); - if(n<0) error->all(FLERR," # CUDA ERRROR on PackComm_Self"); - if((sizeof(X_CFLOAT)!=sizeof(double)) && n) - n=(n+1)*sizeof(X_CFLOAT)/sizeof(double); - } - } - else if (ghost_velocity) - { - n = avec->pack_comm_vel(sendnum[iswap],&iswap, - (double*) firstrecv,pbc_flag[iswap],pbc[iswap]); - //avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],(double*) firstrecv); - } - else - { - n = avec->pack_comm(sendnum[iswap],&iswap, - (double*) firstrecv,pbc_flag[iswap],pbc[iswap]); - //avec->unpack_comm(recvnum[iswap],firstrecv[iswap],(double*) firstrecv); - } - cuda->self_comm=0; - } - } -} - -void CommCuda::forward_comm_pack_cuda() -{ - my_times time1,time2; - int n; // initialize comm buffers & exchange memory - - MPI_Request request; - AtomVec *avec = atom->avec; - - cuda->shared_data.domain.xy=domain->xy; - cuda->shared_data.domain.xz=domain->xz; - cuda->shared_data.domain.yz=domain->yz; - cuda->shared_data.domain.prd[0]=domain->prd[0]; - cuda->shared_data.domain.prd[1]=domain->prd[1]; - cuda->shared_data.domain.prd[2]=domain->prd[2]; - cuda->shared_data.domain.triclinic=domain->triclinic; - if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used - - // exchange data with another proc - // if other proc is self, just copy - // if comm_x_only set, exchange or copy directly to x, don't unpack - - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) - { - if (comm_x_only) - { - - -my_gettime(CLOCK_REALTIME,&time1); - - // n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) cuda->shared_data.comm.buf_send[iswap],pbc[iswap],pbc_flag[iswap]); - n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*)buf_send,pbc[iswap],pbc_flag[iswap]); - -my_gettime(CLOCK_REALTIME,&time2); - - if((sizeof(X_CFLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used - n=(n+1)*sizeof(X_CFLOAT)/sizeof(double); - cuda->shared_data.comm.send_size[iswap]=n; - } - else if (ghost_velocity) - { -my_gettime(CLOCK_REALTIME,&time1); - - // n = Cuda_CommCuda_PackComm_Vel(&cuda->shared_data,sendnum[iswap],iswap,(void*) &buf_send[iswap*maxsend],pbc[iswap],pbc_flag[iswap]); - -my_gettime(CLOCK_REALTIME,&time2); - - if((sizeof(X_CFLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used - n=(n+1)*sizeof(X_CFLOAT)/sizeof(double); - cuda->shared_data.comm.send_size[iswap]=n; - } - else - { - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - - if(avec->cudable) - n = avec->pack_comm(sendnum[iswap],&iswap, - cuda->shared_data.comm.buf_send[iswap],pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - cuda->shared_data.comm.buf_send[iswap],pbc_flag[iswap],pbc[iswap]); - - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); - } - - } - else //sendproc == me - { - if (comm_x_only) - { - if (sendnum[iswap]) - { - n = Cuda_CommCuda_PackComm_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap],pbc[iswap],pbc_flag[iswap]); - if(n<0) error->all(FLERR," # CUDA ERRROR on PackComm_Self"); - if((sizeof(X_CFLOAT)!=sizeof(double)) && n) - n=(n+1)*sizeof(X_CFLOAT)/sizeof(double); - } - } - else if (ghost_velocity) - { - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); - } - else - { - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); - } - } - } - if(not comm_x_only && not avec->cudable) cuda->uploadAll(); -} - -void CommCuda::forward_comm_transfer_cuda() -{ - my_times time1,time2,time3; - int n; - MPI_Request request; - AtomVec *avec = atom->avec; - cuda->shared_data.domain.xy=domain->xy; - cuda->shared_data.domain.xz=domain->xz; - cuda->shared_data.domain.yz=domain->yz; - cuda->shared_data.domain.prd[0]=domain->prd[0]; - cuda->shared_data.domain.prd[1]=domain->prd[1]; - cuda->shared_data.domain.prd[2]=domain->prd[2]; - cuda->shared_data.domain.triclinic=domain->triclinic; - if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used -//printf("A\n"); - // exchange data with another proc - // if other proc is self, just copy - // if comm_x_only set, exchange or copy directly to x, don't unpack - - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) - { - if (comm_x_only) - { - - int size_forward_recv_now=0; - - if((sizeof(X_CFLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used - size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_CFLOAT)/sizeof(double); - else - size_forward_recv_now=size_forward_recv[iswap]; - - //printf("A: %i \n",size_forward_recv_now/1024*4); - //MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE, - // recvproc[iswap],0,world,&request); - MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE, - recvproc[iswap],0,world,&request); - //printf("%p %p %i\n",buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double)); - //memcpy(buf_send,cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap]*sizeof(double)); - // CudaWrapper_SyncStream(1); - //printf("B: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); - CudaWrapper_DownloadCudaDataAsync((void*) buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double),2); - //MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); -my_gettime(CLOCK_REALTIME,&time1); - CudaWrapper_SyncStream(2); - //printf("C: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); -my_gettime(CLOCK_REALTIME,&time2); -cuda->shared_data.cuda_timings.comm_forward_download+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - MPI_Send(buf_send,cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - //printf("D: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); - CudaWrapper_UploadCudaDataAsync((void*) buf_recv,cuda->shared_data.comm.buf_recv_dev[iswap], size_forward_recv_now*sizeof(double),2); -my_gettime(CLOCK_REALTIME,&time1); - CudaWrapper_SyncStream(2); - //printf("E: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); - //memcpy(cuda->shared_data.comm.buf_recv[iswap],buf_recv,size_forward_recv_now*sizeof(double)); - //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now*sizeof(double),cuda->shared_data.comm.send_size[iswap]*sizeof(double)); -my_gettime(CLOCK_REALTIME,&time3); -cuda->shared_data.cuda_timings.comm_forward_upload+= - time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; -cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= - time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000; -my_gettime(CLOCK_REALTIME,&time3); -cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= - time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; - } - else if (ghost_velocity) - { - /* int size_forward_recv_now=0; - - if((sizeof(X_CFLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used - size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_CFLOAT)/sizeof(double); - else - size_forward_recv_now=size_forward_recv[iswap]; - -my_gettime(CLOCK_REALTIME,&time1); - - MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE, - recvproc[iswap],0,world,&request); - -my_gettime(CLOCK_REALTIME,&time2); - - MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - -my_gettime(CLOCK_REALTIME,&time3); -cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= - time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; -cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= - time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000;*/ - - } - else - { - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - - if(avec->cudable) - n = avec->pack_comm(sendnum[iswap],&iswap, - buf_send,pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); - } - - } - else //sendproc == me - { - if (comm_x_only) - { - if (sendnum[iswap]) - { - } - } - else if (ghost_velocity) - { - } - else - { - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); - } - } - } - if(not comm_x_only && not avec->cudable) cuda->uploadAll(); -} - -void CommCuda::forward_comm_unpack_cuda() -{ - int n; - MPI_Request request; - AtomVec *avec = atom->avec; - - cuda->shared_data.domain.xy=domain->xy; - cuda->shared_data.domain.xz=domain->xz; - cuda->shared_data.domain.yz=domain->yz; - cuda->shared_data.domain.prd[0]=domain->prd[0]; - cuda->shared_data.domain.prd[1]=domain->prd[1]; - cuda->shared_data.domain.prd[2]=domain->prd[2]; - cuda->shared_data.domain.triclinic=domain->triclinic; - if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used - - // exchange data with another proc - // if other proc is self, just copy - // if comm_x_only set, exchange or copy directly to x, don't unpack - - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) - { - if (comm_x_only) - { - - //Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],cuda->shared_data.comm.buf_recv[iswap],iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] - Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],buf_recv,iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] - - } - else if (ghost_velocity) - { - //Cuda_CommCuda_UnpackComm_Vel(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],(void*)&buf_recv[iswap*maxrecv]); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] - } - else - { - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - - if(avec->cudable) - n = avec->pack_comm(sendnum[iswap],&iswap, - buf_send,pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); - } - - } - else //sendproc == me - { - if (comm_x_only) - { - if (sendnum[iswap]) - { - } - } - else if (ghost_velocity) - { - } - else - { - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); - } - } - } - if(not comm_x_only && not avec->cudable) cuda->uploadAll(); -} - -void CommCuda::forward_comm_pair(Pair *pair) -{ - if(not cuda->shared_data.pair.cudable_force) - { - return CommBrick::forward_comm_pair(pair); - } - - int iswap,n; - double *buf; - MPI_Request request; - - int nsize = pair->comm_forward; - - for (iswap = 0; iswap < nswap; iswap++) { - - // pack buffer - - n = pair->pack_forward_comm(sendnum[iswap],&iswap, - buf_send,pbc_flag[iswap],pbc[iswap]); - int nrecv = recvnum[iswap]*nsize; - if(nrecv<0) nrecv=-(nrecv+1)/2; - int nsend = n; - if(nsend<0) nsend=-(nsend+1)/2; - - // exchange with another proc - // if self, set recv buffer to send buffer - - if (sendproc[iswap] != me) { - MPI_Irecv(buf_recv,nrecv,MPI_DOUBLE,recvproc[iswap],0, - world,&request); - MPI_Send(buf_send,nsend,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - buf = buf_recv; - } else buf = buf_send; - - // unpack buffer - - pair->unpack_forward_comm(recvnum[iswap],firstrecv[iswap],buf); - } -} - -/* ---------------------------------------------------------------------- - reverse communication of forces on atoms every timestep - other per-atom attributes may also be sent via pack/unpack routines -------------------------------------------------------------------------- */ - -void CommCuda::reverse_comm() -{ - int n; - MPI_Request request; - AtomVec *avec = atom->avec; - double *buf; - - if(not comm_f_only && not avec->cudable) cuda->downloadAll(); //not yet implemented in CUDA but only needed for non standard atom styles - - // exchange data with another proc - // if other proc is self, just copy - // if comm_f_only set, exchange or copy directly from f, don't pack - - for (int iswap = nswap-1; iswap >= 0; iswap--) { - if (sendproc[iswap] != me) { - if (comm_f_only) { - - int size_recv_now=size_reverse_recv[iswap]; - if((sizeof(F_CFLOAT)!=sizeof(double))&& size_reverse_recv[iswap]) - size_recv_now=(size_recv_now+1)*sizeof(F_CFLOAT)/sizeof(double); - MPI_Irecv(buf_recv,size_recv_now,MPI_DOUBLE, - sendproc[iswap],0,world,&request); - - buf=buf_send; - if (size_reverse_send[iswap]) - { - Cuda_CommCuda_PackReverse(&cuda->shared_data,size_reverse_send[iswap]/3,firstrecv[iswap],buf); - } - else buf=NULL; - int size_reverse_send_now=size_reverse_send[iswap]; - if((sizeof(F_CFLOAT)!=sizeof(double))&& size_reverse_send[iswap]) - size_reverse_send_now=(size_reverse_send_now+1)*sizeof(F_CFLOAT)/sizeof(double); - MPI_Send(buf,size_reverse_send_now,MPI_DOUBLE, - recvproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - Cuda_CommCuda_UnpackReverse(&cuda->shared_data,sendnum[iswap],iswap,buf_recv); - - } else { - MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, - sendproc[iswap],0,world,&request); - n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); - MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv); - } - - } else { - if (comm_f_only) { - if (sendnum[iswap]) - Cuda_CommCuda_UnpackReverse_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap]); - } else { - n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); - avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); - } - } - } - if(not comm_f_only && not avec->cudable) cuda->uploadAll(); //not yet implemented in CUDA but only needed for non standard atom styles -} - -/* ---------------------------------------------------------------------- - exchange: move atoms to correct processors - atoms exchanged with all 6 stencil neighbors - send out atoms that have left my box, receive ones entering my box - atoms will be lost if not inside some proc's box - can happen if atom moves outside of non-periodic bounary - or if atom moves more than one proc away - this routine called before every reneighboring - for triclinic, atoms must be in lamda coords (0-1) before exchange is called -------------------------------------------------------------------------- */ - -void CommCuda::exchange() -{ - AtomVec *avec = atom->avec; - - if(not cuda->oncpu && avec->cudable) - return exchange_cuda(); - - if(not cuda->oncpu) cuda->downloadAll(); - - CommBrick::exchange(); -} - - -void CommCuda::exchange_cuda() -{ - int nsend,nrecv,nrecv1,nrecv2,nlocal; - double *buf; - MPI_Request request; - AtomVec *avec = atom->avec; - my_times time1,time2; - - // clear global->local map for owned and ghost atoms - // b/c atoms migrate to new procs in exchange() and - // new ghosts are created in borders() - // map_set() is done at end of borders() - - - if(map_style) cuda->cu_tag->download(); - - if (map_style) atom->map_clear(); - - // loop over dimensions - - for (int dim = 0; dim < 3; dim++) { - // fill buffer with atoms leaving my box, using < and >= - // when atom is deleted, fill it in with last atom - - cuda->shared_data.exchange_dim=dim; - - nlocal = atom->nlocal; - avec->maxsend=&maxsend; - nsend=avec->pack_exchange(dim,(double*) &buf_send); - nlocal = atom->nlocal; - - - atom->nlocal = nlocal; - - // send/recv atoms in both directions - // if 1 proc in dimension, no send/recv, set recv buf to send buf - // if 2 procs in dimension, single send/recv - // if more than 2 procs in dimension, send/recv to both neighbors - - my_gettime(CLOCK_REALTIME,&time1); - - if (procgrid[dim] == 1) { - nrecv = nsend; - buf = buf_send; - - } else { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, - &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); - nrecv = nrecv1; - if (procgrid[dim] > 2) { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, - &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); - nrecv += nrecv2; - } - if (nrecv+1 > maxrecv) grow_recv(nrecv+1); - - MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0, - world,&request); - MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - if (procgrid[dim] > 2) { - MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0, - world,&request); - MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - if((nrecv1==0)||(nrecv2==0)) buf_recv[nrecv]=0; - } - - buf = buf_recv; - } - //printf("nsend: %i nrecv: %i\n",nsend,nrecv); - // check incoming atoms to see if they are in my box - // if so, add to my list -my_gettime(CLOCK_REALTIME,&time2); -cuda->shared_data.cuda_timings.comm_exchange_mpi+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - if(nrecv) - { - avec->maxsend=&maxsend; - avec->unpack_exchange(buf); - } - } - - if(atom->firstgroupname) cuda->downloadAll(); - - if(atom->firstgroupname) atom->first_reorder(); - - if(atom->firstgroupname) cuda->uploadAll(); -} - -/* ---------------------------------------------------------------------- - borders: list nearby atoms to send to neighboring procs at every timestep - one list is created for every swap that will be made - as list is made, actually do swaps - this does equivalent of a communicate (so don't need to explicitly - call communicate routine on reneighboring timestep) - this routine is called before every reneighboring - for triclinic, atoms must be in lamda coords (0-1) before borders is called -------------------------------------------------------------------------- */ - - -void CommCuda::borders() -{ - AtomVec *avec = atom->avec; - if(not cuda->oncpu && avec->cudable) - { - if(cuda->shared_data.overlap_comm&&cuda->finished_setup) - borders_cuda_overlap_forward_comm(); - else - borders_cuda(); - - return; - } - - CommBrick::borders(); - - cuda->setSystemParams(); - if(cuda->finished_setup) {cuda->checkResize(); cuda->uploadAll();} - cuda->shared_data.atom.nghost=atom->nghost; - cu_sendlist->upload(); -} - -void CommCuda::borders_cuda() -{ - int n,iswap,dim,ineed,twoneed,smax,rmax; - int nsend,nrecv,nfirst,nlast; - double *buf; - MPI_Request request; - AtomVec *avec = atom->avec; - my_times time1,time2; - - // clear old ghosts - - atom->nghost = 0; - - // do swaps over all 3 dimensions - - iswap = 0; - smax = rmax = 0; - - cuda->shared_data.comm.nsend=0; - for (dim = 0; dim < 3; dim++) { - nlast = 0; - twoneed = 2*maxneed[dim]; - for (ineed = 0; ineed < twoneed; ineed++) { - - // find atoms within slab boundaries lo/hi using <= and >= - // check atoms between nfirst and nlast - // for first swaps in a dim, check owned and ghost - // for later swaps in a dim, only check newly arrived ghosts - // store sent atom indices in list for use in future timesteps - - if (ineed % 2 == 0) { - nfirst = nlast; - nlast = atom->nlocal + atom->nghost; - } - - nsend = 0; - - // find send atoms according to SINGLE vs MULTI - // all atoms eligible versus atoms in bordergroup - // only need to limit loop to bordergroup for first sends (ineed < 2) - // on these sends, break loop in two: owned (in group) and ghost - do - { - if(nsend>=maxsendlist[iswap]) grow_list(iswap,static_cast <int> (nsend*1.05)); - nsend=Cuda_CommCuda_BuildSendlist(&cuda->shared_data,bordergroup,ineed,style==SINGLE?1:0,atom->nfirst,nfirst,nlast,dim,iswap); - }while(nsend>=maxsendlist[iswap]); - // pack up list of border atoms - - if (nsend*size_border > maxsend) - grow_send(nsend*size_border,0); - - if (ghost_velocity) - n = avec->pack_border_vel(nsend,&iswap,buf_send, - pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_border(nsend,&iswap,buf_send, - pbc_flag[iswap],pbc[iswap]); - - // swap atoms with other proc - // put incoming ghosts at end of my atom arrays - // if swapping with self, simply copy, no messages - -my_gettime(CLOCK_REALTIME,&time1); - if (sendproc[iswap] != me) { - MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0, - &nrecv,1,MPI_INT,recvproc[iswap],0,world,MPI_STATUS_IGNORE); - if (nrecv*size_border > maxrecv) - grow_recv(nrecv*size_border); - MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE, - recvproc[iswap],0,world,&request); - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - buf = buf_recv; - } else { - nrecv = nsend; - buf = buf_send; - } - -my_gettime(CLOCK_REALTIME,&time2); -cuda->shared_data.cuda_timings.comm_border_mpi+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - // unpack buffer - - if (ghost_velocity) - avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); - else - avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf); - - // set all pointers & counters - - smax = MAX(smax,nsend); - rmax = MAX(rmax,nrecv); - sendnum[iswap] = nsend; - recvnum[iswap] = nrecv; - size_forward_recv[iswap] = nrecv*size_forward; - size_reverse_send[iswap] = nrecv*size_reverse; - size_reverse_recv[iswap] = nsend*size_reverse; - firstrecv[iswap] = atom->nlocal + atom->nghost; - atom->nghost += nrecv; - iswap++; - } - } - - // insure send/recv buffers are long enough for all forward & reverse comm - - int max = MAX(maxforward*smax,maxreverse*rmax); - if (max > maxsend) grow_send(max,0); - max = MAX(maxforward*rmax,maxreverse*smax); - if (max > maxrecv) grow_recv(max); - - // reset global->local map - if(map_style) - { - cuda->cu_tag->download(); - atom->map_set(); - } - - cuda->setSystemParams(); - cuda->shared_data.atom.nghost+=n; -} - -void CommCuda::borders_cuda_overlap_forward_comm() -{ - int n,iswap,dim,ineed,twoneed,smax,rmax; - int nsend,nrecv,nfirst,nlast; - double *buf; - MPI_Request request; - AtomVec *avec = atom->avec; - my_times time1,time2; - - // clear old ghosts - - atom->nghost = 0; - - // do swaps over all 3 dimensions - - iswap = 0; - smax = rmax = 0; - - cuda->shared_data.comm.nsend=0; - for (dim = 0; dim < 3; dim++) { - nlast = 0; - twoneed = 2*maxneed[dim]; - for (ineed = 0; ineed < twoneed; ineed++) { - - // find atoms within slab boundaries lo/hi using <= and >= - // check atoms between nfirst and nlast - // for first swaps in a dim, check owned and ghost - // for later swaps in a dim, only check newly arrived ghosts - // store sent atom indices in list for use in future timesteps - - if (ineed % 2 == 0) { - nfirst = nlast; - nlast = atom->nlocal + atom->nghost; - } - - nsend = 0; - - // find send atoms according to SINGLE vs MULTI - // all atoms eligible versus atoms in bordergroup - // only need to limit loop to bordergroup for first sends (ineed < 2) - // on these sends, break loop in two: owned (in group) and ghost - do - { - if(nsend>=maxsendlist[iswap]) grow_list(iswap,static_cast <int> (nsend*1.05)); - nsend=Cuda_CommCuda_BuildSendlist(&cuda->shared_data,bordergroup,ineed,style==SINGLE?1:0,atom->nfirst,nfirst,nlast,dim,iswap); - }while(nsend>=maxsendlist[iswap]); - cuda->shared_data.comm.nsend_swap[iswap]=nsend; - // pack up list of border atoms - - if (nsend*size_border > maxsend) - grow_send(nsend*size_border,0); - - if (ghost_velocity) - n = avec->pack_border_vel(nsend,&iswap,buf_send, - pbc_flag[iswap],pbc[iswap]); - else - n = avec->pack_border(nsend,&iswap,buf_send, - pbc_flag[iswap],pbc[iswap]); - - // swap atoms with other proc - // put incoming ghosts at end of my atom arrays - // if swapping with self, simply copy, no messages - -my_gettime(CLOCK_REALTIME,&time1); - if (sendproc[iswap] != me) { - MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0, - &nrecv,1,MPI_INT,recvproc[iswap],0,world,MPI_STATUS_IGNORE); - if (nrecv*size_border > maxrecv) - grow_recv(nrecv*size_border); - MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE, - recvproc[iswap],0,world,&request); - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - buf = buf_recv; - } else { - nrecv = nsend; - buf = buf_send; - } - -my_gettime(CLOCK_REALTIME,&time2); -cuda->shared_data.cuda_timings.comm_border_mpi+= - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; - - // unpack buffer - - if (ghost_velocity) - avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); - else - avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf); - - // set all pointers & counters - - smax = MAX(smax,nsend); - rmax = MAX(rmax,nrecv); - sendnum[iswap] = nsend; - recvnum[iswap] = nrecv; - size_forward_recv[iswap] = nrecv*size_forward; - size_reverse_send[iswap] = nrecv*size_reverse; - size_reverse_recv[iswap] = nsend*size_reverse; - firstrecv[iswap] = atom->nlocal + atom->nghost; - atom->nghost += nrecv; - iswap++; - } - } - - // insure send/recv buffers are long enough for all forward & reverse comm - - int max = MAX(maxforward*smax,maxreverse*rmax); - if (max > maxsend) grow_send(max,0); - max = MAX(maxforward*rmax,maxreverse*smax); - if (max > maxrecv) grow_recv(max); - - // reset global->local map - if(map_style) - { - cuda->cu_tag->download(); - atom->map_set(); - } - - cuda->setSystemParams(); - cuda->shared_data.atom.nghost+=n; -} - - - - -void CommCuda::forward_comm_fix(Fix *fix, int size) -{ - int iswap,n; - double *buf; - MPI_Request request; - - int nsize = fix->comm_forward; - - for (iswap = 0; iswap < nswap; iswap++) { - // pack buffer - if(fix->cudable_comm&&cuda->finished_setup) - { - int swap=iswap; - if(sendproc[iswap] == me) {swap=-iswap-1; buf=(double*)&(firstrecv[iswap]);} - else buf=buf_send; - - n = fix->pack_forward_comm(sendnum[iswap],&swap, - buf,pbc_flag[iswap],pbc[iswap]); - if(sendproc[iswap] == me) - { - continue; - } - } - else - n = fix->pack_forward_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - - // exchange with another proc - // if self, set recv buffer to send buffer - - if (sendproc[iswap] != me) { - MPI_Irecv(buf_recv,nsize*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0, - world,&request); - MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - buf = buf_recv; - } else buf = buf_send; - - // unpack buffer - - fix->unpack_forward_comm(recvnum[iswap],firstrecv[iswap],buf); - } -} - - -void CommCuda::grow_send(int n, int flag) -{ - int oldmaxsend = (maxsend+BUFEXTRA)*sizeof(double); - maxsend = static_cast<int> (BUFFACTOR * n); - if (flag){ - if(cuda->pinned) - { - double* tmp = new double[oldmaxsend]; - memcpy((void*) tmp,(void*) buf_send,oldmaxsend*sizeof(double)); - if(buf_send) CudaWrapper_FreePinnedHostData((void*) (buf_send)); - buf_send = (double*) CudaWrapper_AllocPinnedHostData((maxsend+BUFEXTRA)*sizeof(double),false); - memcpy(buf_send,tmp,oldmaxsend*sizeof(double)); - delete [] tmp; - } - else - { - buf_send = (double *) - memory->srealloc(buf_send,(maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send");printf("srealloc\n"); - } - } - else { - if(cuda->pinned) - { - if(buf_send) CudaWrapper_FreePinnedHostData((void*) buf_send); - buf_send = (double*) CudaWrapper_AllocPinnedHostData((maxsend+BUFEXTRA)*sizeof(double),false); - } - else - { - memory->sfree(buf_send); - buf_send = (double *) memory->smalloc((maxsend+BUFEXTRA)*sizeof(double), - "comm:buf_send"); - } - for(int i=0;i<maxswap;i++) - { - if(cuda->shared_data.comm.buf_send_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_send_dev[i],oldmaxsend); - cuda->shared_data.comm.buf_send_dev[i]=CudaWrapper_AllocCudaData((maxsend+BUFEXTRA)*sizeof(double)); - } - } -} -/* ---------------------------------------------------------------------- - free/malloc the size of the recv buffer as needed with BUFFACTOR -------------------------------------------------------------------------- */ - - -void CommCuda::grow_recv(int n) -{ - int oldmaxrecv = maxrecv*sizeof(double); - maxrecv = static_cast<int> (BUFFACTOR * n); - if(cuda->pinned) - { - if(buf_recv) CudaWrapper_FreePinnedHostData((void*)buf_recv); - buf_recv = (double*) CudaWrapper_AllocPinnedHostData(maxrecv*sizeof(double), false,true); - } - else - { - memory->sfree(buf_recv); - buf_recv = (double *) memory->smalloc(maxrecv*sizeof(double), - "comm:buf_recv"); - } - for(int i=0;i<maxswap;i++) - { - if(cuda->shared_data.comm.buf_recv_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_recv_dev[i],oldmaxrecv); - cuda->shared_data.comm.buf_recv_dev[i]=CudaWrapper_AllocCudaData((maxrecv)*sizeof(double)); - } -} - -/* ---------------------------------------------------------------------- - realloc the size of the iswap sendlist as needed with BUFFACTOR -------------------------------------------------------------------------- */ - -void CommCuda::grow_list(int iswap, int n) -{ - - MYDBG(printf(" # CUDA CommCuda::grow_list\n");) - if(cuda->finished_setup&&cu_sendlist) cu_sendlist->download(); - if(!cu_sendlist||n*BUFFACTOR>cu_sendlist->get_dim()[1]||n*BUFFACTOR>maxsendlist[iswap]) - { - for(int i=0;i<maxswap;i++) - { - maxsendlist[i] = static_cast<int> (BUFFACTOR * n); - sendlist[i] = (int *) - memory->srealloc(sendlist[i],maxsendlist[i]*sizeof(int), - "comm:sendlist[iswap]"); - } - delete cu_sendlist; - cu_sendlist=new cCudaData<int, int, xy> ((int*)sendlist,maxswap,maxsendlist[iswap]); - cuda->shared_data.comm.sendlist.dev_data=cu_sendlist->dev_data(); - cuda->shared_data.comm.maxlistlength=maxsendlist[iswap]; - cu_sendlist->upload(); - } - } - -/* ---------------------------------------------------------------------- - realloc the buffers needed for swaps -------------------------------------------------------------------------- */ - -void CommCuda::grow_swap(int n) -{ - int oldmaxswap=maxswap; - CommBrick::grow_swap(n); - if(n>cu_sendlist->get_dim()[0]) - { - MYDBG(printf(" # CUDA CommCuda::grow_swap\n");) - - delete cu_sendlist; - cu_sendlist=new cCudaData<int, int, xy> ((int*)sendlist,n,BUFMIN); - cuda->shared_data.comm.sendlist.dev_data=cu_sendlist->dev_data(); - cuda->shared_data.comm.maxlistlength=BUFMIN; - cuda->shared_data.comm.maxswap=n; - cuda->shared_data.comm.nsend_swap=new int[n]; - cuda->shared_data.comm.send_size=new int[n]; - cuda->shared_data.comm.recv_size=new int[n]; - } - for(int i=0;i<oldmaxswap;i++) - { - if(cuda->shared_data.comm.buf_recv_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_recv_dev[i],maxrecv*sizeof(double)); - if(cuda->shared_data.comm.buf_send_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_send_dev[i],maxsend*sizeof(double)); - cuda->shared_data.comm.buf_recv_dev[i]=NULL; - cuda->shared_data.comm.buf_send_dev[i]=NULL; - } - cuda->shared_data.comm.buf_send= new double*[n]; - cuda->shared_data.comm.buf_recv= new double*[n]; - cuda->shared_data.comm.buf_send_dev= new void*[n]; - cuda->shared_data.comm.buf_recv_dev= new void*[n]; - for(int i=0;i<n;i++) - { - cuda->shared_data.comm.buf_recv[i]=NULL; - cuda->shared_data.comm.buf_send[i]=NULL; - cuda->shared_data.comm.buf_recv_dev[i]=NULL; - cuda->shared_data.comm.buf_send_dev[i]=NULL; - } - grow_send(maxsend,0); - grow_recv(maxrecv); - - maxswap=n; -} - -/* ---------------------------------------------------------------------- - allocation of swap info -------------------------------------------------------------------------- */ - -void CommCuda::allocate_swap(int n) -{ - CommBrick::allocate_swap(n); - - delete cu_pbc; - delete cu_slablo; - delete cu_slabhi; - - cuda->shared_data.comm.maxswap=n; - if(cu_sendlist) - { - cu_pbc=new cCudaData<int, int, xy> ((int*)pbc,n,6); - cu_slablo = new cCudaData<double, X_CFLOAT,x>(slablo,n); - cu_slabhi = new cCudaData<double, X_CFLOAT,x>(slabhi,n); - - cuda->shared_data.comm.pbc.dev_data=cu_pbc->dev_data(); - cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data(); - cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data(); - } - cuda->shared_data.comm.nsend_swap=new int[n]; - cuda->shared_data.comm.send_size=new int[n]; - cuda->shared_data.comm.recv_size=new int[n]; - cuda->shared_data.comm.buf_send= new double*[n]; - cuda->shared_data.comm.buf_recv= new double*[n]; - cuda->shared_data.comm.buf_send_dev= new void*[n]; - cuda->shared_data.comm.buf_recv_dev= new void*[n]; - for(int i=0;i<n;i++) cuda->shared_data.comm.buf_send_dev[i]=NULL; - for(int i=0;i<n;i++) cuda->shared_data.comm.buf_recv_dev[i]=NULL; -} - - -/* ---------------------------------------------------------------------- - allocation of multi-type swap info -------------------------------------------------------------------------- */ - -void CommCuda::allocate_multi(int n) -{ - CommBrick::allocate_multi(n); - - delete cu_multilo; - delete cu_multihi; - cu_multilo = new cCudaData<double, X_CFLOAT,xy>(slablo,n,atom->ntypes+1); - cu_multihi = new cCudaData<double, X_CFLOAT,xy>(slabhi,n,atom->ntypes+1); - - cuda->shared_data.comm.multilo.dev_data=cu_multilo->dev_data(); - cuda->shared_data.comm.multihi.dev_data=cu_multihi->dev_data(); -} - -/* ---------------------------------------------------------------------- - free memory for swaps -------------------------------------------------------------------------- */ - -void CommCuda::free_swap() -{ - - CommBrick::free_swap(); - - delete cuda->shared_data.comm.nsend_swap; cuda->shared_data.comm.nsend_swap=NULL; - delete cu_pbc; cu_pbc = NULL; - delete cu_slablo; cu_slablo = NULL; - delete cu_slabhi; cu_slabhi = NULL; - for(int i=0;i<maxswap;i++) - { - if(cuda->shared_data.comm.buf_recv_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_recv_dev[i],maxrecv*sizeof(double)); - if(cuda->shared_data.comm.buf_send_dev[i]) CudaWrapper_FreeCudaData(cuda->shared_data.comm.buf_send_dev[i],maxsend*sizeof(double)); - } - - -} - -/* ---------------------------------------------------------------------- - free memory for multi-type swaps -------------------------------------------------------------------------- */ - -void CommCuda::free_multi() -{ - CommBrick::free_multi(); - delete cu_multilo; cu_multilo = NULL; - delete cu_multihi; cu_multihi = NULL; -} diff --git a/src/USER-CUDA/comm_cuda.h b/src/USER-CUDA/comm_cuda.h deleted file mode 100644 index 5105018f32..0000000000 --- a/src/USER-CUDA/comm_cuda.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_COMM_CUDA_H -#define LMP_COMM_CUDA_H - -#include "pointers.h" - -#include "cuda_data.h" -#include "comm_brick.h" - -namespace LAMMPS_NS { - -class CommCuda : public CommBrick { -public: - CommCuda(class LAMMPS *); - ~CommCuda(); - - virtual void init(); - virtual void setup(); // setup 3d communication pattern - virtual void forward_comm(int mode=0); // forward communication of atom coords - virtual void forward_comm_cuda(); - virtual void forward_comm_pack_cuda(); - virtual void forward_comm_transfer_cuda(); - virtual void forward_comm_unpack_cuda(); - virtual void forward_comm_pair(Pair *pair); - virtual void reverse_comm(); // reverse communication of forces - virtual void exchange(); // move atoms to new procs - virtual void exchange_cuda(); // move atoms to new procs - virtual void borders(); // setup list of atoms to communicate - virtual void borders_cuda(); // setup list of atoms to communicate - virtual void borders_cuda_overlap_forward_comm(); - virtual void forward_comm_fix(class Fix *, int size=0); // forward comm from a Fix - - - - - protected: - class Cuda *cuda; - cCudaData<int, int, xy>* cu_pbc; - cCudaData<double, X_CFLOAT, x>* cu_slablo; - cCudaData<double, X_CFLOAT, x>* cu_slabhi; - cCudaData<double, X_CFLOAT, xy>* cu_multilo; - cCudaData<double, X_CFLOAT, xy>* cu_multihi; - - cCudaData<int, int, xy>* cu_sendlist; - virtual void grow_send(int,int); // reallocate send buffer - virtual void grow_recv(int); // free/allocate recv buffer - virtual void grow_list(int, int); // reallocate one sendlist - virtual void grow_swap(int); // grow swap and multi arrays - virtual void allocate_swap(int); // allocate swap arrays - virtual void allocate_multi(int); // allocate multi arrays - virtual void free_swap(); // free swap arrays - virtual void free_multi(); // free multi arrays -}; - -} - -#endif diff --git a/src/USER-CUDA/compute_pe_cuda.cpp b/src/USER-CUDA/compute_pe_cuda.cpp deleted file mode 100644 index b8661c9702..0000000000 --- a/src/USER-CUDA/compute_pe_cuda.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstring> -#include "compute_pe_cuda.h" -#include "atom.h" -#include "update.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "dihedral.h" -#include "improper.h" -#include "kspace.h" -#include "modify.h" -#include "domain.h" -#include "error.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -ComputePECuda::ComputePECuda(LAMMPS *lmp, int narg, char **arg) : - ComputePE(lmp, narg, arg) -{ - cudable = 1; -} diff --git a/src/USER-CUDA/compute_pe_cuda.h b/src/USER-CUDA/compute_pe_cuda.h deleted file mode 100644 index bc8b057762..0000000000 --- a/src/USER-CUDA/compute_pe_cuda.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef COMPUTE_CLASS - -ComputeStyle(pe/cuda,ComputePECuda) - -#else - -#ifndef LMP_COMPUTE_PE_CUDA_H -#define LMP_COMPUTE_PE_CUDA_H - -#include "compute_pe.h" - -namespace LAMMPS_NS { - -class ComputePECuda : public ComputePE { - public: - ComputePECuda(class LAMMPS *, int, char **); - ~ComputePECuda() {} -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/compute_pressure_cuda.cpp b/src/USER-CUDA/compute_pressure_cuda.cpp deleted file mode 100644 index c92e918ad0..0000000000 --- a/src/USER-CUDA/compute_pressure_cuda.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstring> -#include <cstdlib> -#include "compute_pressure_cuda.h" -#include "atom.h" -#include "update.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "dihedral.h" -#include "improper.h" -#include "kspace.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -enum{DUMMY0,INVOKED_SCALAR,INVOKED_VECTOR,DUMMMY3,INVOKED_PERATOM}; - -/* ---------------------------------------------------------------------- */ - -ComputePressureCuda::ComputePressureCuda(LAMMPS *lmp, int narg, char **arg) : - ComputePressure(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - cudable = 1; - - // store temperature ID used by pressure computation - // insure it is valid for temperature computation - - int n = strlen(arg[3]) + 1; - char* id_temp = new char[n]; - strcpy(id_temp,arg[3]); - - int icompute = modify->find_compute(id_temp); - delete [] id_temp; - if (modify->compute[icompute]->cudable == 0) - { - error->warning(FLERR,"Compute pressure/cuda temperature ID is not cudable! Try a temp/cuda style."); - cudable = 0; - } - -} - -double ComputePressureCuda::compute_scalar() -{ - if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll(); - return ComputePressure::compute_scalar(); -} - -void ComputePressureCuda::compute_vector() -{ - if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll(); - ComputePressure::compute_vector(); -} diff --git a/src/USER-CUDA/compute_pressure_cuda.h b/src/USER-CUDA/compute_pressure_cuda.h deleted file mode 100644 index af48091708..0000000000 --- a/src/USER-CUDA/compute_pressure_cuda.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ -#ifdef COMPUTE_CLASS - -ComputeStyle(pressure/cuda,ComputePressureCuda) - -#else - -#ifndef LMP_COMPUTE_PRESSURE_CUDA_H -#define LMP_COMPUTE_PRESSURE_CUDA_H - -#include "compute_pressure.h" - -namespace LAMMPS_NS { - -class ComputePressureCuda : public ComputePressure { - public: - ComputePressureCuda(class LAMMPS *, int, char **); - ~ComputePressureCuda() {} - double compute_scalar(); - void compute_vector(); - - private: - class Cuda *cuda; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/compute_temp_cuda.cpp b/src/USER-CUDA/compute_temp_cuda.cpp deleted file mode 100644 index 85afa07258..0000000000 --- a/src/USER-CUDA/compute_temp_cuda.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "compute_temp_cuda.h" -#include "compute_temp_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "force.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "group.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -ComputeTempCuda::ComputeTempCuda(LAMMPS *lmp, int narg, char **arg) : - Compute(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 3) error->all(FLERR,"Illegal compute temp/cuda command"); - - scalar_flag = vector_flag = 1; - size_vector = 6; - extscalar = 0; - extvector = 1; - tempflag = 1; - - vector = new double[6]; - cu_t_vector = 0; - cu_t_scalar = 0; - cudable=true; - -} - -/* ---------------------------------------------------------------------- */ - -ComputeTempCuda::~ComputeTempCuda() -{ - delete [] vector; - delete cu_t_vector; - delete cu_t_scalar; -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempCuda::setup() -{ - dynamic = 0; - if (dynamic_user || group->dynamic[igroup]) dynamic = 1; - - fix_dof = 0; - for (int i = 0; i < modify->nfix; i++) - fix_dof += modify->fix[i]->dof(igroup); - dof_compute(); -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempCuda::dof_compute() -{ - double natoms = group->count(igroup); - dof = domain->dimension * natoms; - dof -= extra_dof + fix_dof; - if (dof > 0.0) tfactor = force->mvv2e / (dof * force->boltz); - else tfactor = 0.0; -} - -/* ---------------------------------------------------------------------- */ - -double ComputeTempCuda::compute_scalar() -{ - if(cuda->begin_setup) - { - if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_CFLOAT, x> (t_vector,6); - if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_CFLOAT, x> (&t_scalar,1); - invoked_scalar = update->ntimestep; - Cuda_ComputeTempCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_CFLOAT*) cu_t_scalar->dev_data()); - cu_t_scalar->download(); - } - else - { - invoked_scalar = update->ntimestep; - - double **v = atom->v; - double *mass = atom->mass; - double *rmass = atom->rmass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double t = 0.0; - - if (rmass) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * rmass[i]; - } else { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * - mass[type[i]]; - } - t_scalar=t; - } - - MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world); - if (dynamic) dof_compute(); - scalar *= tfactor; - if(scalar>1e15) - { - cuda->cu_v->download(); - cuda->cu_x->download(); - cuda->cu_type->download(); - double **v = atom->v; - double **x = atom->x; - printf("Out of v-range atoms: \n"); - for(int i=0;i<atom->nlocal;i++) - if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5) - printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]); - error->all(FLERR,"Temperature out of range. Simulations will be abortet.\n"); - } - return scalar; -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempCuda::compute_vector() -{ - int i; - if(cuda->begin_setup) - { - if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_CFLOAT, x> (t_vector,6); - if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_CFLOAT, x> (&t_scalar,1); - - invoked_vector = update->ntimestep; - - Cuda_ComputeTempCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_CFLOAT*) cu_t_vector->dev_data()); - cu_t_vector->download(); - } - else - { - - invoked_vector = update->ntimestep; - - double **v = atom->v; - double *mass = atom->mass; - double *rmass = atom->rmass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double massone,t[6]; - for (i = 0; i < 6; i++) t[i] = 0.0; - - for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - if (rmass) massone = rmass[i]; - else massone = mass[type[i]]; - t[0] += massone * v[i][0]*v[i][0]; - t[1] += massone * v[i][1]*v[i][1]; - t[2] += massone * v[i][2]*v[i][2]; - t[3] += massone * v[i][0]*v[i][1]; - t[4] += massone * v[i][0]*v[i][2]; - t[5] += massone * v[i][1]*v[i][2]; - } - - for (i = 0; i < 6; i++) t_vector[i]=t[i]; - } - MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) vector[i] *= force->mvv2e; -} diff --git a/src/USER-CUDA/compute_temp_cuda.h b/src/USER-CUDA/compute_temp_cuda.h deleted file mode 100644 index 54b3338c08..0000000000 --- a/src/USER-CUDA/compute_temp_cuda.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef COMPUTE_CLASS - -ComputeStyle(temp/cuda,ComputeTempCuda) - -#else - -#ifndef LMP_COMPUTE_TEMP_CUDA_H -#define LMP_COMPUTE_TEMP_CUDA_H - -#include "compute.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class ComputeTempCuda : public Compute { - public: - ComputeTempCuda(class LAMMPS *, int, char **); - ~ComputeTempCuda(); - void init() {} - void setup(); - double compute_scalar(); - void compute_vector(); - - private: - class Cuda *cuda; - int fix_dof; - double tfactor; - - void dof_compute(); - double t_vector[6]; - double t_scalar; - cCudaData<double , ENERGY_CFLOAT , x>* cu_t_scalar; - cCudaData<double , ENERGY_CFLOAT , x>* cu_t_vector; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/compute_temp_partial_cuda.cpp b/src/USER-CUDA/compute_temp_partial_cuda.cpp deleted file mode 100644 index b366c546f6..0000000000 --- a/src/USER-CUDA/compute_temp_partial_cuda.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "compute_temp_partial_cuda.h" -#include "compute_temp_partial_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "force.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "group.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -ComputeTempPartialCuda::ComputeTempPartialCuda(LAMMPS *lmp, int narg, char **arg) : - Compute(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 6) error->all(FLERR,"Illegal compute temp/partial command"); - - scalar_flag = vector_flag = 1; - size_vector = 6; - extscalar = 0; - extvector = 1; - tempflag = 1; - tempbias = 1; - - xflag = force->inumeric(FLERR,arg[3]); - yflag = force->inumeric(FLERR,arg[4]); - zflag = force->inumeric(FLERR,arg[5]); - if (zflag && domain->dimension == 2) - error->all(FLERR,"Compute temp/partial cannot use vz for 2d systemx"); - - maxbias = 0; - vbiasall = NULL; - - vector = new double[6]; - cu_t_vector = 0; - cu_t_scalar = 0; - cu_vbiasall=NULL; - cudable=true; - -} - -/* ---------------------------------------------------------------------- */ - -ComputeTempPartialCuda::~ComputeTempPartialCuda() -{ - memory->destroy(vbiasall); - delete [] vector; - delete cu_t_vector; - delete cu_t_scalar; - delete cu_vbiasall; -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::setup() -{ - dynamic = 0; - if (dynamic_user || group->dynamic[igroup]) dynamic = 1; - - fix_dof = 0; - for (int i = 0; i < modify->nfix; i++) - fix_dof += modify->fix[i]->dof(igroup); - dof_compute(); -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::dof_compute() -{ - double natoms = group->count(igroup); - int nper = xflag+yflag+zflag; - dof = nper * natoms; - dof -= (1.0*nper/domain->dimension)*fix_dof + extra_dof; - if (dof > 0) tfactor = force->mvv2e / (dof * force->boltz); - else tfactor = 0.0; -} - -/* ---------------------------------------------------------------------- */ - -int ComputeTempPartialCuda::dof_remove(int i) -{ - int nper = xflag+yflag+zflag; - return (domain->dimension - nper); -} - -/* ---------------------------------------------------------------------- */ - -double ComputeTempPartialCuda::compute_scalar() -{ - if(cuda->begin_setup) - { - if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_CFLOAT, x> (t_vector,6); - if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_CFLOAT, x> (&t_scalar,1); - invoked_scalar = update->ntimestep; - Cuda_ComputeTempPartialCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_CFLOAT*) cu_t_scalar->dev_data(),xflag,yflag,zflag); - cu_t_scalar->download(); - } - else - { - invoked_scalar = update->ntimestep; - - double **v = atom->v; - double *mass = atom->mass; - double *rmass = atom->rmass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double t = 0.0; - - if (rmass) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) * rmass[i]; - } else { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) * - mass[type[i]]; - } - t_scalar=t; - } - - MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world); - if (dynamic) dof_compute(); - scalar *= tfactor; - if(scalar>1e15) - { - cuda->cu_v->download(); - cuda->cu_x->download(); - cuda->cu_type->download(); - double **v = atom->v; - double **x = atom->x; - printf("Out of v-range atoms: \n"); - for(int i=0;i<atom->nlocal;i++) - if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5) - printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]); - error->all(FLERR,"Temperature out of range. Simulations will be abortet.\n"); - } - return scalar; -} - -/* ---------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::compute_vector() -{ - int i; - if(cuda->begin_setup) - { - if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_CFLOAT, x> (t_vector,6); - if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_CFLOAT, x> (&t_scalar,1); - - invoked_vector = update->ntimestep; - - Cuda_ComputeTempPartialCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_CFLOAT*) cu_t_vector->dev_data(),xflag,yflag,zflag); - cu_t_vector->download(); - } - else - { - - invoked_vector = update->ntimestep; - - double **v = atom->v; - double *mass = atom->mass; - double *rmass = atom->rmass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double massone,t[6]; - for (i = 0; i < 6; i++) t[i] = 0.0; - - for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - if (rmass) massone = rmass[i]; - else massone = mass[type[i]]; - t[0] += massone * xflag*v[i][0]*v[i][0]; - t[1] += massone * yflag*v[i][1]*v[i][1]; - t[2] += massone * zflag*v[i][2]*v[i][2]; - t[3] += massone * xflag*yflag*v[i][0]*v[i][1]; - t[4] += massone * xflag*zflag*v[i][0]*v[i][2]; - t[5] += massone * yflag*zflag*v[i][1]*v[i][2]; - } - - for (i = 0; i < 6; i++) t_vector[i]=t[i]; - } - MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) vector[i] *= force->mvv2e; -} - -/* ---------------------------------------------------------------------- - remove velocity bias from atom I to leave thermal velocity -------------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::remove_bias(int i, double *v) -{ - if (!xflag) { - vbias[0] = v[0]; - v[0] = 0.0; - } - if (!yflag) { - vbias[1] = v[1]; - v[1] = 0.0; - } - if (!zflag) { - vbias[2] = v[2]; - v[2] = 0.0; - } -} - -/* ---------------------------------------------------------------------- - remove velocity bias from all atoms to leave thermal velocity -------------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::remove_bias_all() -{ - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - if (nlocal > maxbias) { - memory->destroy(vbiasall); - maxbias = atom->nmax; - memory->create(vbiasall,maxbias,3,"temp/partial:vbiasall"); - delete cu_vbiasall; - cu_vbiasall = new cCudaData<double, V_CFLOAT, yx> ((double*)vbiasall, atom->nmax, 3); - } - if(cuda->begin_setup) - { - Cuda_ComputeTempPartialCuda_RemoveBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data()); - } - else - { - if (!xflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - vbiasall[i][0] = v[i][0]; - v[i][0] = 0.0; - } - } - if (!yflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - vbiasall[i][1] = v[i][1]; - v[i][1] = 0.0; - } - } - if (!zflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - vbiasall[i][2] = v[i][2]; - v[i][2] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - add back in velocity bias to atom I removed by remove_bias() - assume remove_bias() was previously called -------------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::restore_bias(int i, double *v) -{ - if (!xflag) v[0] += vbias[0]; - if (!yflag) v[1] += vbias[1]; - if (!zflag) v[2] += vbias[2]; -} - -/* ---------------------------------------------------------------------- - add back in velocity bias to all atoms removed by remove_bias_all() - assume remove_bias_all() was previously called -------------------------------------------------------------------------- */ - -void ComputeTempPartialCuda::restore_bias_all() -{ - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if(cuda->begin_setup) - { - Cuda_ComputeTempPartialCuda_RestoreBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data()); - } - else - { - - if (!xflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - v[i][0] += vbiasall[i][0]; - } - if (!yflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - v[i][1] += vbiasall[i][1]; - } - if (!zflag) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - v[i][2] += vbiasall[i][2]; - } - } -} - -/* ---------------------------------------------------------------------- */ - -double ComputeTempPartialCuda::memory_usage() -{ - double bytes = maxbias * sizeof(double); - return bytes; -} diff --git a/src/USER-CUDA/compute_temp_partial_cuda.h b/src/USER-CUDA/compute_temp_partial_cuda.h deleted file mode 100644 index 320bf17858..0000000000 --- a/src/USER-CUDA/compute_temp_partial_cuda.h +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef COMPUTE_CLASS - -ComputeStyle(temp/partial/cuda,ComputeTempPartialCuda) - -#else - -#ifndef LMP_COMPUTE_TEMP_PARTIAL_CUDA_H -#define LMP_COMPUTE_TEMP_PARTIAL_CUDA_H - -#include "compute.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class ComputeTempPartialCuda : public Compute { - public: - ComputeTempPartialCuda(class LAMMPS *, int, char **); - ~ComputeTempPartialCuda(); - void init() {} - void setup(); - double compute_scalar(); - void compute_vector(); - - int dof_remove(int); - void remove_bias(int, double *); - void remove_bias_all(); - void restore_bias(int, double *); - void restore_bias_all(); - double memory_usage(); - - private: - class Cuda *cuda; - int xflag,yflag,zflag; - int fix_dof; - double tfactor; - - void dof_compute(); - double t_vector[6]; - double t_scalar; - cCudaData<double , ENERGY_CFLOAT , x>* cu_t_scalar; - cCudaData<double , ENERGY_CFLOAT , x>* cu_t_vector; - cCudaData<double, V_CFLOAT, yx>* cu_vbiasall; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/cuda.cpp b/src/USER-CUDA/cuda.cpp deleted file mode 100644 index 995289a792..0000000000 --- a/src/USER-CUDA/cuda.cpp +++ /dev/null @@ -1,1067 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "user_cuda.h" -#include "atom.h" -#include "domain.h" -#include "force.h" -#include "pair.h" -#include "update.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "universe.h" -#include "input.h" -#include "atom_masks.h" -#include "error.h" - -#include "cuda_neigh_list.h" -//#include "pre_binning_cu.h" -//#include "reverse_binning_cu.h" -#include <ctime> -#include <cmath> -#include "cuda_pair_cu.h" -#include "cuda_cu.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -Cuda::Cuda(LAMMPS* lmp) : Pointers(lmp) -{ - cuda_exists = true; - lmp->cuda = this; - - if (universe->me == 0) printf("# Using LAMMPS_CUDA \n"); - - shared_data.me = universe->me; - - device_set = false; - devicelist = NULL; - - Cuda_Cuda_GetCompileSettings(&shared_data); - - if (universe->me == 0) { - - if(shared_data.compile_settings.prec_glob != sizeof(CUDA_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: Global Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_glob, (int) sizeof(CUDA_CFLOAT) / 4); - - if(shared_data.compile_settings.prec_x != sizeof(X_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: X Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_x, (int) sizeof(X_CFLOAT) / 4); - - if(shared_data.compile_settings.prec_v != sizeof(V_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: V Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_v, (int) sizeof(V_CFLOAT) / 4); - - if(shared_data.compile_settings.prec_f != sizeof(F_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: F Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_f, (int) sizeof(F_CFLOAT) / 4); - - if(shared_data.compile_settings.prec_pppm != sizeof(PPPM_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: PPPM Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_pppm, (int) sizeof(PPPM_CFLOAT) / 4); - - if(shared_data.compile_settings.prec_fft != sizeof(FFT_CFLOAT) / 4) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: FFT Precision: cuda %i cpp %i\n\n", - shared_data.compile_settings.prec_fft, (int) sizeof(FFT_CFLOAT) / 4); - -#ifdef FFT_CUFFT - if(shared_data.compile_settings.cufft != 1) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: cufft: cuda %i cpp %i\n\n", - shared_data.compile_settings.cufft, 1); -#else - if(shared_data.compile_settings.cufft != 0) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: cufft: cuda %i cpp %i\n\n", - shared_data.compile_settings.cufft, 0); -#endif - - if(shared_data.compile_settings.arch != CUDA_ARCH) - printf("\n\n # CUDA WARNING: Compile Settings of cuda and cpp code differ! \n" - " # CUDA WARNING: arch: cuda %i cpp %i\n\n", - shared_data.compile_settings.cufft, CUDA_ARCH); - } - - cu_x = 0; - cu_v = 0; - cu_f = 0; - cu_tag = 0; - cu_type = 0; - cu_mask = 0; - cu_image = 0; - cu_xhold = 0; - cu_q = 0; - cu_rmass = 0; - cu_mass = 0; - cu_virial = 0; - cu_eatom = 0; - cu_vatom = 0; - cu_radius = 0; - cu_density = 0; - cu_omega = 0; - cu_torque = 0; - - cu_special = 0; - cu_nspecial = 0; - - cu_molecule = 0; - - cu_x_type = 0; - x_type = 0; - cu_v_radius = 0; - v_radius = 0; - cu_omega_rmass = 0; - omega_rmass = 0; - - binned_id = 0; - cu_binned_id = 0; - binned_idnew = 0; - cu_binned_idnew = 0; - - cu_map_array = 0; - - copy_buffer = 0; - copy_buffersize = 0; - - neighbor_decide_by_integrator = 0; - pinned = true; - - debugdata = 0; - - finished_setup = false; - begin_setup = false; - finished_run = false; - - setSharedDataZero(); - - uploadtime = 0; - downloadtime = 0; - dotiming = false; - - dotestatom = false; - testatom = 0; - oncpu = true; - - self_comm = 0; - MYDBG(printf("# CUDA: Cuda::Cuda Done...\n");) - //cCudaData<double, float, yx > -} - -/* ---------------------------------------------------------------------- */ - -Cuda::~Cuda() -{ - print_timings(); - - if (universe->me == 0) printf("# CUDA: Free memory...\n"); - - delete [] devicelist; - - delete cu_q; - delete cu_x; - delete cu_v; - delete cu_f; - delete cu_tag; - delete cu_type; - delete cu_mask; - delete cu_image; - delete cu_xhold; - delete cu_mass; - delete cu_rmass; - delete cu_virial; - delete cu_eng_vdwl; - delete cu_eng_coul; - delete cu_extent; - delete cu_eatom; - delete cu_vatom; - delete cu_radius; - delete cu_density; - delete cu_omega; - delete cu_torque; - delete cu_molecule; - - delete cu_x_type; - delete [] x_type; - delete cu_v_radius; - delete [] v_radius; - delete cu_omega_rmass; - delete [] omega_rmass; - - delete cu_debugdata; - delete[] debugdata; - - delete cu_map_array; - - std::map<NeighList*, CudaNeighList*>::iterator p = neigh_lists.begin(); - - while(p != neigh_lists.end()) { - delete p->second; - ++p; - } -} - -/* ---------------------------------------------------------------------- - package cuda command - can be invoked multiple times: -c on, -pk, package command - can only init GPUs once in activate(), so just store params here -------------------------------------------------------------------------- */ - -void Cuda::accelerator(int narg, char **arg) -{ - // this error should not happen - - if (device_set) error->all(FLERR,"USER-CUDA device is already activated"); - - // pppn = # of GPUs/node - - pppn = force->inumeric(FLERR,arg[0]); - if (pppn <= 0) error->all(FLERR,"Illegal package cuda command"); - - // optional args - - delete [] devicelist; - devicelist = NULL; - int newtonflag = 0; - - int iarg = 1; - while (iarg < narg) { - if (strcmp(arg[iarg],"newton") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package cuda command"); - if (strcmp(arg[iarg+1],"off") == 0) newtonflag = 0; - else if (strcmp(arg[iarg+1],"on") == 0) newtonflag = 1; - else error->all(FLERR,"Illegal package cuda command"); - } else if (strcmp(arg[iarg],"gpuID") == 0) { - if (iarg+pppn+1 > narg) error->all(FLERR,"Illegal package cuda command"); - devicelist = new int[pppn]; - for (int k = 0; k < pppn; k++) - devicelist[k] = force->inumeric(FLERR,arg[iarg+k+1]); - iarg += pppn + 1; - } else if (strcmp(arg[iarg],"timing") == 0) { - dotiming = true; - iarg++; - } else if (strcmp(arg[iarg],"test") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package cuda command"); - testatom = force->numeric(FLERR,arg[iarg+1]); - dotestatom = true; - iarg += 2; - } else if (strcmp(arg[iarg],"thread") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package cuda command"); - if (strcmp(arg[iarg+1],"auto") == 0) - shared_data.pair.override_block_per_atom = -1; - else if (strcmp(arg[iarg+1],"tpa") == 0) - shared_data.pair.override_block_per_atom = 0; - else if (strcmp(arg[iarg+1],"bpa") == 0) - shared_data.pair.override_block_per_atom = 1; - else error->all(FLERR,"Illegal package cuda command"); - iarg += 2; - } - - // undocumented options - - else if (strcmp(arg[iarg],"suffix") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package cuda command"); - strcpy(lmp->suffix,arg[iarg+1]); - iarg += 2; - } else if (strcmp(arg[iarg],"overlap_comm") == 0) { - shared_data.overlap_comm = 1; - iarg++; - } else if (strcmp(arg[iarg],"pinned") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package cuda command"); - pinned = force->inumeric(FLERR,arg[iarg+1]) == 0 ? false : true; - if ((pinned == false) && (universe->me == 0)) - printf(" #CUDA: Pinned memory is not used for communication\n"); - iarg += 2; - } else error->all(FLERR,"Illegal package cuda command"); - } - - // set newton flags - - force->newton = force->newton_pair = force->newton_bond = newtonflag; -} - -/* ---------------------------------------------------------------------- - activate the GPUs - only done once with whatever settings used by the last package command -------------------------------------------------------------------------- */ - -void Cuda::activate() -{ - if (device_set) return; - device_set = true; - - if (universe->me == 0) printf("# CUDA: Activate GPU \n"); - - CudaWrapper_Init(0, (char**)0, universe->me, pppn, devicelist); - //if(shared_data.overlap_comm) - CudaWrapper_AddStreams(3); - cu_x = 0; - cu_v = 0; - cu_f = 0; - cu_tag = 0; - cu_type = 0; - cu_mask = 0; - cu_image = 0; - cu_xhold = 0; - cu_q = 0; - cu_rmass = 0; - cu_mass = 0; - cu_virial = 0; - cu_eatom = 0; - cu_vatom = 0; - cu_radius = 0; - cu_density = 0; - cu_omega = 0; - cu_torque = 0; - - cu_special = 0; - cu_nspecial = 0; - - cu_molecule = 0; - - cu_x_type = 0; - cu_v_radius = 0; - cu_omega_rmass = 0; - - cu_binned_id = 0; - cu_binned_idnew = 0; - allocate(); -} - -/* ---------------------------------------------------------------------- */ - -void Cuda::setSharedDataZero() -{ - MYDBG(printf("# CUDA: Cuda::setSharedDataZero ...\n");) - shared_data.atom.nlocal = 0; - shared_data.atom.nghost = 0; - shared_data.atom.nall = 0; - shared_data.atom.nmax = 0; - shared_data.atom.ntypes = 0; - shared_data.atom.q_flag = 0; - shared_data.atom.need_eatom = 0; - shared_data.atom.need_vatom = 0; - shared_data.atom.update_nmax = 1; - shared_data.atom.update_nlocal = 1; - shared_data.atom.update_neigh = 1; - - shared_data.pair.cudable_force = 0; - shared_data.pair.collect_forces_later = 0; - shared_data.pair.use_block_per_atom = 0; - shared_data.pair.override_block_per_atom = -1; - shared_data.pair.cut = 0; - shared_data.pair.cutsq = 0; - shared_data.pair.cut_inner = 0; - shared_data.pair.cut_coul = 0; - shared_data.pair.special_lj = 0; - shared_data.pair.special_coul = 0; - - shared_data.pair.neighall = false; - - shared_data.pppm.cudable_force = 0; - - shared_data.buffersize = 0; - shared_data.buffer_new = 1; - shared_data.buffer = NULL; - - shared_data.comm.comm_phase = 0; - shared_data.overlap_comm = 0; - - shared_data.comm.buffer = NULL; - shared_data.comm.buffer_size = 0; - shared_data.comm.overlap_split_ratio = 0; - // setTimingsZero(); -} - -void Cuda::allocate() -{ - MYDBG(printf("# CUDA: Cuda::allocate ...\n");) - - if(not cu_virial) { - cu_virial = new cCudaData<double, ENERGY_CFLOAT, x > (NULL, & shared_data.pair.virial , 6); - cu_eng_vdwl = new cCudaData<double, ENERGY_CFLOAT, x > (NULL, & shared_data.pair.eng_vdwl , 1); - cu_eng_coul = new cCudaData<double, ENERGY_CFLOAT, x > (NULL, & shared_data.pair.eng_coul , 1); - cu_extent = new cCudaData<double, double, x> (extent, 6); - shared_data.flag = CudaWrapper_AllocCudaData(sizeof(int)); - int size = 2 * CUDA_MAX_DEBUG_SIZE; - debugdata = new int[size]; - cu_debugdata = new cCudaData<int, int, x > (debugdata , size); - shared_data.debugdata = cu_debugdata->dev_data(); - } - - checkResize(); - setSystemParams(); - MYDBG(printf("# CUDA: Cuda::allocate done...\n");) -} - -void Cuda::setSystemParams() -{ - MYDBG(printf("# CUDA: Cuda::setSystemParams ...\n");) - shared_data.atom.nlocal = atom->nlocal; - shared_data.atom.nghost = atom->nghost; - shared_data.atom.nall = atom->nlocal + atom->nghost; - shared_data.atom.ntypes = atom->ntypes; - shared_data.atom.q_flag = atom->q_flag; - shared_data.atom.rmass_flag = atom->rmass_flag; - MYDBG(printf("# CUDA: Cuda::setSystemParams done ...\n");) -} - -void Cuda::setDomainParams() -{ - MYDBG(printf("# CUDA: Cuda::setDomainParams ...\n");) - cuda_shared_domain* cu_domain = &shared_data.domain; - - cu_domain->triclinic = domain->triclinic; - - for(short i = 0; i < 3; ++i) { - cu_domain->periodicity[i] = domain->periodicity[i]; - cu_domain->sublo[i] = domain->sublo[i]; - cu_domain->subhi[i] = domain->subhi[i]; - cu_domain->boxlo[i] = domain->boxlo[i]; - cu_domain->boxhi[i] = domain->boxhi[i]; - cu_domain->prd[i] = domain->prd[i]; - } - - if(domain->triclinic) { - for(short i = 0; i < 3; ++i) { - cu_domain->boxlo_lamda[i] = domain->boxlo_lamda[i]; - cu_domain->boxhi_lamda[i] = domain->boxhi_lamda[i]; - cu_domain->prd_lamda[i] = domain->prd_lamda[i]; - cu_domain->sublo[i] = domain->sublo_lamda[i]; - cu_domain->subhi[i] = domain->subhi_lamda[i]; - } - - cu_domain->xy = domain->xy; - cu_domain->xz = domain->xz; - cu_domain->yz = domain->yz; - } - - for(int i = 0; i < 6; i++) { - cu_domain->h[i] = domain->h[i]; - cu_domain->h_inv[i] = domain->h_inv[i]; - cu_domain->h_rate[i] = domain->h_rate[i]; - } - - cu_domain->update = 2; - MYDBG(printf("# CUDA: Cuda::setDomainParams done ...\n");) -} - -void Cuda::checkResize() -{ - MYDBG(printf("# CUDA: Cuda::checkResize ...\n");) - cuda_shared_atom* cu_atom = & shared_data.atom; - cu_atom->q_flag = atom->q_flag; - cu_atom->rmass_flag = atom->rmass ? 1 : 0; - cu_atom->nall = atom->nlocal + atom->nghost; - cu_atom->nlocal = atom->nlocal; - cu_atom->nghost = atom->nghost; - - // do we have more atoms to upload than currently allocated memory on device? (also true if nothing yet allocated) - if(atom->nmax > cu_atom->nmax || cu_tag == NULL) { - delete cu_x; - cu_x = new cCudaData<double, X_CFLOAT, yx> ((double*)atom->x , & cu_atom->x , atom->nmax, 3, 0, true); //cu_x->set_buffer(&(shared_data.buffer),&(shared_data.buffersize),true); - delete cu_v; - cu_v = new cCudaData<double, V_CFLOAT, yx> ((double*)atom->v, & cu_atom->v , atom->nmax, 3); - delete cu_f; - cu_f = new cCudaData<double, F_CFLOAT, yx> ((double*)atom->f, & cu_atom->f , atom->nmax, 3, 0, true); - delete cu_tag; - cu_tag = new cCudaData<int , int , x > (atom->tag , & cu_atom->tag , atom->nmax, 0, true); - delete cu_type; - cu_type = new cCudaData<int , int , x > (atom->type , & cu_atom->type , atom->nmax, 0, true); - delete cu_mask; - cu_mask = new cCudaData<int , int , x > (atom->mask , & cu_atom->mask , atom->nmax, 0, true); - delete cu_image; - cu_image = new cCudaData<int , int , x > (atom->image , & cu_atom->image , atom->nmax, 0, true); - - if(atom->rmass) { - delete cu_rmass; - cu_rmass = new cCudaData<double, V_CFLOAT, x > (atom->rmass , & cu_atom->rmass , atom->nmax); - } - - if(cu_atom->q_flag) { - delete cu_q; - cu_q = new cCudaData<double, F_CFLOAT, x > ((double*)atom->q, & cu_atom->q , atom->nmax, 0 , true); - }// cu_q->set_buffer(&(copy_buffer),&(copy_buffersize),true);} - - if(atom->radius) { - delete cu_radius; - cu_radius = new cCudaData<double, X_CFLOAT, x > (atom->radius , & cu_atom->radius , atom->nmax); - delete cu_v_radius; - cu_v_radius = new cCudaData<V_CFLOAT, V_CFLOAT, x> (v_radius , & cu_atom->v_radius , atom->nmax * 4); - delete cu_omega_rmass; - cu_omega_rmass = new cCudaData<V_CFLOAT, V_CFLOAT, x> (omega_rmass , & cu_atom->omega_rmass , atom->nmax * 4); - } - - if(atom->omega) { - delete cu_omega; - cu_omega = new cCudaData<double, V_CFLOAT, yx > (((double*) atom->omega) , & cu_atom->omega , atom->nmax, 3); - } - - if(atom->torque) { - delete cu_torque; - cu_torque = new cCudaData<double, F_CFLOAT, yx > (((double*) atom->torque) , & cu_atom->torque , atom->nmax, 3); - } - - if(atom->special) { - delete cu_special; - cu_special = new cCudaData<int, int, yx > (((int*) & (atom->special[0][0])) , & cu_atom->special , atom->nmax, atom->maxspecial, 0 , true); - shared_data.atom.maxspecial = atom->maxspecial; - } - - if(atom->nspecial) { - delete cu_nspecial; - cu_nspecial = new cCudaData<int, int, yx > (((int*) atom->nspecial) , & cu_atom->nspecial , atom->nmax, 3, 0, true); - } - - if(atom->molecule) { - delete cu_molecule; - cu_molecule = new cCudaData<int, int, x > (((int*) atom->molecule) , & cu_atom->molecule , atom->nmax, 0 , true); - } - - shared_data.atom.special_flag = neighbor->special_flag; - shared_data.atom.molecular = atom->molecular; - - cu_atom->update_nmax = 2; - cu_atom->nmax = atom->nmax; - - delete cu_x_type; - cu_x_type = new cCudaData<X_CFLOAT, X_CFLOAT, x> (x_type , & cu_atom->x_type , atom->nmax * 4); - } - - if(((cu_xhold == NULL) || (cu_xhold->get_dim()[0] < neighbor->maxhold)) && neighbor->xhold) { - delete cu_xhold; - cu_xhold = new cCudaData<double, X_CFLOAT, yx> ((double*)neighbor->xhold, & cu_atom->xhold , neighbor->maxhold, 3); - shared_data.atom.maxhold = neighbor->maxhold; - } - - if(atom->mass && !cu_mass) { - cu_mass = new cCudaData<double, V_CFLOAT, x > (atom->mass , & cu_atom->mass , atom->ntypes + 1); - } - - cu_atom->mass_host = atom->mass; - - if(atom->map_style == 1) { - if(cu_map_array == NULL) { - cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size()); - } else if(cu_map_array->dev_size() / sizeof(int) < atom->get_map_size()) { - delete cu_map_array; - cu_map_array = new cCudaData<int, int, x > (atom->get_map_array() , & cu_atom->map_array , atom->get_map_size()); - } - } - - - // if any of the host pointers have changed (e.g. re-allocated somewhere else), set to correct pointer - if(cu_x ->get_host_data() != atom->x) cu_x ->set_host_data((double*)(atom->x)); - - if(cu_v ->get_host_data() != atom->v) cu_v ->set_host_data((double*)(atom->v)); - - if(cu_f ->get_host_data() != atom->f) cu_f ->set_host_data((double*)(atom->f)); - - if(cu_tag ->get_host_data() != atom->tag) cu_tag ->set_host_data(atom->tag); - - if(cu_type->get_host_data() != atom->type) cu_type->set_host_data(atom->type); - - if(cu_mask->get_host_data() != atom->mask) cu_mask->set_host_data(atom->mask); - - if(cu_image->get_host_data() != atom->image) cu_mask->set_host_data(atom->image); - - if(cu_xhold) - if(cu_xhold->get_host_data() != neighbor->xhold) cu_xhold->set_host_data((double*)(neighbor->xhold)); - - if(atom->rmass) - if(cu_rmass->get_host_data() != atom->rmass) cu_rmass->set_host_data((double*)(atom->rmass)); - - if(cu_atom->q_flag) - if(cu_q->get_host_data() != atom->q) cu_q->set_host_data((double*)(atom->q)); - - if(atom->radius) - if(cu_radius->get_host_data() != atom->radius) cu_radius->set_host_data((double*)(atom->radius)); - - if(atom->omega) - if(cu_omega->get_host_data() != atom->omega) cu_omega->set_host_data((double*)(atom->omega)); - - if(atom->torque) - if(cu_torque->get_host_data() != atom->torque) cu_torque->set_host_data((double*)(atom->torque)); - - if(atom->special) - if(cu_special->get_host_data() != atom->special) { - delete cu_special; - cu_special = new cCudaData<int, int, yx > (((int*) atom->special) , & cu_atom->special , atom->nmax, atom->maxspecial); - shared_data.atom.maxspecial = atom->maxspecial; - } - - if(atom->nspecial) - if(cu_nspecial->get_host_data() != atom->nspecial) cu_nspecial->set_host_data((int*)(atom->nspecial)); - - if(atom->molecule) - if(cu_molecule->get_host_data() != atom->molecule) cu_molecule->set_host_data((int*)(atom->molecule)); - - if(force) - if(cu_virial ->get_host_data() != force->pair->virial) cu_virial ->set_host_data(force->pair->virial); - - if(force) - if(cu_eng_vdwl ->get_host_data() != &force->pair->eng_vdwl) cu_eng_vdwl ->set_host_data(&force->pair->eng_vdwl); - - if(force) - if(cu_eng_coul ->get_host_data() != &force->pair->eng_coul) cu_eng_coul ->set_host_data(&force->pair->eng_coul); - - cu_atom->update_nlocal = 2; - MYDBG(printf("# CUDA: Cuda::checkResize done...\n");) -} - -void Cuda::evsetup_eatom_vatom(int eflag_atom, int vflag_atom) -{ - if(eflag_atom) { - if(not cu_eatom) - cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > (force->pair->eatom, & (shared_data.atom.eatom) , atom->nmax); // cu_eatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);} - - if(cu_eatom->get_dim()[0] != atom->nmax) { - //delete cu_eatom; - //cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > (force->pair->eatom, & (shared_data.atom.eatom) , atom->nmax );// cu_eatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);} - shared_data.atom.update_nmax = 2; - } - - cu_eatom->set_host_data(force->pair->eatom); - cu_eatom->memset_device(0); - } - - if(vflag_atom) { - if(not cu_vatom) - cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom) , atom->nmax , 6);// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);} - - if(cu_vatom->get_dim()[0] != atom->nmax) { - //delete cu_vatom; - //cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)force->pair->vatom, & (shared_data.atom.vatom) , atom->nmax ,6 );// cu_vatom->set_buffer(&(copy_buffer),&(copy_buffersize),true);} - shared_data.atom.update_nmax = 2; - } - - cu_vatom->set_host_data((double*)force->pair->vatom); - cu_vatom->memset_device(0); - } -} - -void Cuda::uploadAll() -{ - MYDBG(printf("# CUDA: Cuda::uploadAll() ... start\n");) - my_times starttime; - my_times endtime; - - if(atom->nmax != shared_data.atom.nmax) checkResize(); - - my_gettime(CLOCK_REALTIME, &starttime); - cu_x ->upload(); - cu_v ->upload(); - cu_f ->upload(); - cu_tag ->upload(); - cu_type->upload(); - cu_mask->upload(); - cu_image->upload(); - - if(shared_data.atom.q_flag) cu_q ->upload(); - - if(atom->rmass) cu_rmass->upload(); - - if(atom->radius) cu_radius->upload(); - - if(atom->omega) cu_omega->upload(); - - if(atom->torque) cu_torque->upload(); - - if(atom->special) cu_special->upload(); - - if(atom->nspecial) cu_nspecial->upload(); - - if(atom->molecule) cu_molecule->upload(); - - if(cu_eatom) cu_eatom->upload(); - - if(cu_vatom) cu_vatom->upload(); - - my_gettime(CLOCK_REALTIME, &endtime); - uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); - CUDA_IF_BINNING(Cuda_PreBinning(& shared_data);) - CUDA_IF_BINNING(Cuda_Binning(& shared_data);) - - shared_data.atom.triggerneighsq = neighbor->triggersq; - MYDBG(printf("# CUDA: Cuda::uploadAll() ... end\n");) -} - -void Cuda::downloadAll() -{ - MYDBG(printf("# CUDA: Cuda::downloadAll() ... start\n");) - my_times starttime; - my_times endtime; - - if(atom->nmax != shared_data.atom.nmax) checkResize(); - - CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);) - my_gettime(CLOCK_REALTIME, &starttime); - cu_x ->download(); - cu_v ->download(); - cu_f ->download(); - cu_type->download(); - cu_tag ->download(); - cu_mask->download(); - cu_image->download(); - - //if(shared_data.atom.need_eatom) cu_eatom->download(); - //if(shared_data.atom.need_vatom) cu_vatom->download(); - - if(shared_data.atom.q_flag) cu_q ->download(); - - if(atom->rmass) cu_rmass->download(); - - if(atom->radius) cu_radius->download(); - - if(atom->omega) cu_omega->download(); - - if(atom->torque) cu_torque->download(); - - if(atom->special) cu_special->download(); - - if(atom->nspecial) cu_nspecial->download(); - - if(atom->molecule) cu_molecule->download(); - - if(cu_eatom) cu_eatom->download(); - - if(cu_vatom) cu_vatom->download(); - - my_gettime(CLOCK_REALTIME, &endtime); - downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); - MYDBG(printf("# CUDA: Cuda::downloadAll() ... end\n");) -} - -void Cuda::upload(int datamask) -{ - MYDBG(printf("# CUDA: Cuda::upload() ... start\n");) - my_times starttime; - my_times endtime; - - if(atom->nmax != shared_data.atom.nmax) checkResize(); - - my_gettime(CLOCK_REALTIME, &starttime); - if(X_MASK & datamask) cu_x ->upload(); - if(V_MASK & datamask) cu_v ->upload(); - if(F_MASK & datamask) cu_f ->upload(); - if(TYPE_MASK & datamask) cu_type->upload(); - if(TAG_MASK & datamask) cu_tag ->upload(); - if(MASK_MASK & datamask) cu_mask->upload(); - if(IMAGE_MASK & datamask) cu_image->upload(); - - //if(shared_data.atom.need_eatom) cu_eatom->upload(); - //if(shared_data.atom.need_vatom) cu_vatom->upload(); - - if(shared_data.atom.q_flag) - if(Q_MASK & datamask) cu_q ->upload(); - - if(atom->rmass) - if(RMASS_MASK & datamask) cu_rmass->upload(); - - if(atom->radius) - if(RADIUS_MASK & datamask) cu_radius->upload(); - - if(atom->omega) - if(OMEGA_MASK & datamask) cu_omega->upload(); - - if(atom->torque) - if(TORQUE_MASK & datamask) cu_torque->upload(); - - if(atom->special) - if(SPECIAL_MASK & datamask) cu_special->upload(); - - if(atom->nspecial) - if(SPECIAL_MASK & datamask) cu_nspecial->upload(); - - if(atom->molecule) - if(MOLECULE_MASK & datamask) cu_molecule->upload(); - - if(cu_eatom) cu_eatom->upload(); - - if(cu_vatom) cu_vatom->upload(); - - my_gettime(CLOCK_REALTIME, &endtime); - uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); - MYDBG(printf("# CUDA: Cuda::upload() ... end\n");) -} - -void Cuda::download(int datamask) -{ - MYDBG(printf("# CUDA: Cuda::download() ... start\n");) - my_times starttime; - my_times endtime; - - if(atom->nmax != shared_data.atom.nmax) checkResize(); - - CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);) - my_gettime(CLOCK_REALTIME, &starttime); - if(X_MASK & datamask) cu_x ->download(); - if(V_MASK & datamask) cu_v ->download(); - if(F_MASK & datamask) cu_f ->download(); - if(TYPE_MASK & datamask) cu_type->download(); - if(TAG_MASK & datamask) cu_tag ->download(); - if(MASK_MASK & datamask) cu_mask->download(); - if(IMAGE_MASK & datamask) cu_image->download(); - - //if(shared_data.atom.need_eatom) cu_eatom->download(); - //if(shared_data.atom.need_vatom) cu_vatom->download(); - - if(shared_data.atom.q_flag) - if(Q_MASK & datamask) cu_q ->download(); - - if(atom->rmass) - if(RMASS_MASK & datamask) cu_rmass->download(); - - if(atom->radius) - if(RADIUS_MASK & datamask) cu_radius->download(); - - if(atom->omega) - if(OMEGA_MASK & datamask) cu_omega->download(); - - if(atom->torque) - if(TORQUE_MASK & datamask) cu_torque->download(); - - if(atom->special) - if(SPECIAL_MASK & datamask) cu_special->download(); - - if(atom->nspecial) - if(SPECIAL_MASK & datamask) cu_nspecial->download(); - - if(atom->molecule) - if(MOLECULE_MASK & datamask) cu_molecule->download(); - - if(cu_eatom) cu_eatom->download(); - - if(cu_vatom) cu_vatom->download(); - - my_gettime(CLOCK_REALTIME, &endtime); - downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); - MYDBG(printf("# CUDA: Cuda::download() ... end\n");) -} - -void Cuda::downloadX() -{ - Cuda_Pair_RevertXType(& this->shared_data); - cu_x->download(); -} - -CudaNeighList* Cuda::registerNeighborList(class NeighList* neigh_list) -{ - MYDBG(printf("# CUDA: Cuda::registerNeighborList() ... start a\n");) - std::map<NeighList*, CudaNeighList*>::iterator p = neigh_lists.find(neigh_list); - - if(p != neigh_lists.end()) return p->second; - else { - CudaNeighList* neigh_list_cuda = new CudaNeighList(lmp, neigh_list); - neigh_lists.insert(std::pair<NeighList*, CudaNeighList*>(neigh_list, neigh_list_cuda)); - return neigh_list_cuda; - } - - MYDBG(printf("# CUDA: Cuda::registerNeighborList() ... end b\n");) -} - -void Cuda::uploadAllNeighborLists() -{ - MYDBG(printf("# CUDA: Cuda::uploadAllNeighborList() ... start\n");) - std::map<NeighList*, CudaNeighList*>::iterator p = neigh_lists.begin(); - - while(p != neigh_lists.end()) { - p->second->nl_upload(); - - if(not(p->second->neigh_list->cuda_list->build_cuda)) - for(int i = 0; i < atom->nlocal; i++) - p->second->sneighlist.maxneighbors = MAX(p->second->neigh_list->numneigh[i], p->second->sneighlist.maxneighbors) ; - - ++p; - } - - MYDBG(printf("# CUDA: Cuda::uploadAllNeighborList() ... done\n");) -} - -void Cuda::downloadAllNeighborLists() -{ - MYDBG(printf("# CUDA: Cuda::downloadAllNeighborList() ... start\n");) - std::map<NeighList*, CudaNeighList*>::iterator p = neigh_lists.begin(); - - while(p != neigh_lists.end()) { - p->second->nl_download(); - ++p; - } -} - -void Cuda::update_xhold(int &maxhold, double* xhold) -{ - if(this->shared_data.atom.maxhold < atom->nmax) { - maxhold = atom->nmax; - delete this->cu_xhold; - this->cu_xhold = new cCudaData<double, X_CFLOAT, yx> ((double*)xhold, & this->shared_data.atom.xhold , maxhold, 3); - } - - this->shared_data.atom.maxhold = maxhold; - CudaWrapper_CopyData(this->cu_xhold->dev_data(), this->cu_x->dev_data(), 3 * atom->nmax * sizeof(X_CFLOAT)); -} - -void Cuda::setTimingsZero() -{ - shared_data.cuda_timings.test1 = 0; - shared_data.cuda_timings.test2 = 0; - - //communication - shared_data.cuda_timings.comm_forward_total = 0; - shared_data.cuda_timings.comm_forward_mpi_upper = 0; - shared_data.cuda_timings.comm_forward_mpi_lower = 0; - shared_data.cuda_timings.comm_forward_kernel_pack = 0; - shared_data.cuda_timings.comm_forward_kernel_unpack = 0; - shared_data.cuda_timings.comm_forward_upload = 0; - shared_data.cuda_timings.comm_forward_download = 0; - - shared_data.cuda_timings.comm_exchange_total = 0; - shared_data.cuda_timings.comm_exchange_mpi = 0; - shared_data.cuda_timings.comm_exchange_kernel_pack = 0; - shared_data.cuda_timings.comm_exchange_kernel_unpack = 0; - shared_data.cuda_timings.comm_exchange_kernel_fill = 0; - shared_data.cuda_timings.comm_exchange_cpu_pack = 0; - shared_data.cuda_timings.comm_exchange_upload = 0; - shared_data.cuda_timings.comm_exchange_download = 0; - - shared_data.cuda_timings.comm_border_total = 0; - shared_data.cuda_timings.comm_border_mpi = 0; - shared_data.cuda_timings.comm_border_kernel_pack = 0; - shared_data.cuda_timings.comm_border_kernel_unpack = 0; - shared_data.cuda_timings.comm_border_kernel_buildlist = 0; - shared_data.cuda_timings.comm_border_kernel_self = 0; - shared_data.cuda_timings.comm_border_upload = 0; - shared_data.cuda_timings.comm_border_download = 0; - - //pair forces - shared_data.cuda_timings.pair_xtype_conversion = 0; - shared_data.cuda_timings.pair_kernel = 0; - shared_data.cuda_timings.pair_virial = 0; - shared_data.cuda_timings.pair_force_collection = 0; - - //neighbor - shared_data.cuda_timings.neigh_bin = 0; - shared_data.cuda_timings.neigh_build = 0; - shared_data.cuda_timings.neigh_special = 0; - - //PPPM - shared_data.cuda_timings.pppm_particle_map = 0; - shared_data.cuda_timings.pppm_make_rho = 0; - shared_data.cuda_timings.pppm_brick2fft = 0; - shared_data.cuda_timings.pppm_poisson = 0; - shared_data.cuda_timings.pppm_fillbrick = 0; - shared_data.cuda_timings.pppm_fieldforce = 0; - shared_data.cuda_timings.pppm_compute = 0; - - CudaWrapper_CheckUploadTime(true); - CudaWrapper_CheckDownloadTime(true); - CudaWrapper_CheckCPUBufUploadTime(true); - CudaWrapper_CheckCPUBufDownloadTime(true); -} - -void Cuda::print_timings() -{ - if(universe->me != 0) return; - - if(not dotiming) return; - - printf("\n # CUDA: Special timings\n\n"); - printf("\n Transfer Times\n"); - printf(" PCIe Upload: \t %lf s\n", CudaWrapper_CheckUploadTime()); - printf(" PCIe Download:\t %lf s\n", CudaWrapper_CheckDownloadTime()); - printf(" CPU Tempbbuf Upload: \t %lf \n", CudaWrapper_CheckCPUBufUploadTime()); - printf(" CPU Tempbbuf Download: \t %lf \n", CudaWrapper_CheckCPUBufDownloadTime()); - - printf("\n Communication \n"); - - printf(" Forward Total \t %lf \n", shared_data.cuda_timings.comm_forward_total); - printf(" Forward MPI Upper Bound \t %lf \n", shared_data.cuda_timings.comm_forward_mpi_upper); - printf(" Forward MPI Lower Bound \t %lf \n", shared_data.cuda_timings.comm_forward_mpi_lower); - printf(" Forward Kernel Pack \t %lf \n", shared_data.cuda_timings.comm_forward_kernel_pack); - printf(" Forward Kernel Unpack \t %lf \n", shared_data.cuda_timings.comm_forward_kernel_unpack); - printf(" Forward Kernel Self \t %lf \n", shared_data.cuda_timings.comm_forward_kernel_self); - printf(" Forward Upload \t %lf \n", shared_data.cuda_timings.comm_forward_upload); - printf(" Forward Download \t %lf \n", shared_data.cuda_timings.comm_forward_download); - printf(" Forward Overlap Split Ratio\t %lf \n", shared_data.comm.overlap_split_ratio); - printf("\n"); - - printf(" Exchange Total \t %lf \n", shared_data.cuda_timings.comm_exchange_total); - printf(" Exchange MPI \t %lf \n", shared_data.cuda_timings.comm_exchange_mpi); - printf(" Exchange Kernel Pack \t %lf \n", shared_data.cuda_timings.comm_exchange_kernel_pack); - printf(" Exchange Kernel Unpack \t %lf \n", shared_data.cuda_timings.comm_exchange_kernel_unpack); - printf(" Exchange Kernel Fill \t %lf \n", shared_data.cuda_timings.comm_exchange_kernel_fill); - printf(" Exchange CPU Pack \t %lf \n", shared_data.cuda_timings.comm_exchange_cpu_pack); - printf(" Exchange Upload \t %lf \n", shared_data.cuda_timings.comm_exchange_upload); - printf(" Exchange Download \t %lf \n", shared_data.cuda_timings.comm_exchange_download); - printf("\n"); - - printf(" Border Total \t %lf \n", shared_data.cuda_timings.comm_border_total); - printf(" Border MPI \t %lf \n", shared_data.cuda_timings.comm_border_mpi); - printf(" Border Kernel Pack \t %lf \n", shared_data.cuda_timings.comm_border_kernel_pack); - printf(" Border Kernel Unpack \t %lf \n", shared_data.cuda_timings.comm_border_kernel_unpack); - printf(" Border Kernel Self \t %lf \n", shared_data.cuda_timings.comm_border_kernel_self); - printf(" Border Kernel BuildList \t %lf \n", shared_data.cuda_timings.comm_border_kernel_buildlist); - printf(" Border Upload \t %lf \n", shared_data.cuda_timings.comm_border_upload); - printf(" Border Download \t %lf \n", shared_data.cuda_timings.comm_border_download); - printf("\n"); - - //pair forces - printf(" Pair XType Conversion \t %lf \n", shared_data.cuda_timings.pair_xtype_conversion); - printf(" Pair Kernel \t %lf \n", shared_data.cuda_timings.pair_kernel); - printf(" Pair Virial \t %lf \n", shared_data.cuda_timings.pair_virial); - printf(" Pair Force Collection \t %lf \n", shared_data.cuda_timings.pair_force_collection); - printf("\n"); - - //neighbor - printf(" Neighbor Binning \t %lf \n", shared_data.cuda_timings.neigh_bin); - printf(" Neighbor Build \t %lf \n", shared_data.cuda_timings.neigh_build); - printf(" Neighbor Special \t %lf \n", shared_data.cuda_timings.neigh_special); - printf("\n"); - - //pppm - if(force->kspace) { - printf(" PPPM Total \t %lf \n", shared_data.cuda_timings.pppm_compute); - printf(" PPPM Particle Map \t %lf \n", shared_data.cuda_timings.pppm_particle_map); - printf(" PPPM Make Rho \t %lf \n", shared_data.cuda_timings.pppm_make_rho); - printf(" PPPM Brick2fft \t %lf \n", shared_data.cuda_timings.pppm_brick2fft); - printf(" PPPM Poisson \t %lf \n", shared_data.cuda_timings.pppm_poisson); - printf(" PPPM Fillbrick \t %lf \n", shared_data.cuda_timings.pppm_fillbrick); - printf(" PPPM Fieldforce \t %lf \n", shared_data.cuda_timings.pppm_fieldforce); - printf("\n"); - } - - printf(" Debug Test 1 \t %lf \n", shared_data.cuda_timings.test1); - printf(" Debug Test 2 \t %lf \n", shared_data.cuda_timings.test2); - - printf("\n"); -} diff --git a/src/USER-CUDA/cuda_data.h b/src/USER-CUDA/cuda_data.h deleted file mode 100644 index bb778c12d3..0000000000 --- a/src/USER-CUDA/cuda_data.h +++ /dev/null @@ -1,796 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifndef _CUDA_DATA_H_ -#define _CUDA_DATA_H_ - - -enum copy_mode {x, xx, xy, yx, xyz, xzy}; // yxz, yzx, zxy, zyx not yet implemented since they were not needed yet -//xx==x in atom_vec x is a member therefore copymode x produces compile errors -#include "cuda_shared.h" -#include "cuda_wrapper_cu.h" -#include "cuda_data_cu.h" -#include <ctime> - -#include <cstdio> -#include <typeinfo> -template <typename host_type, typename dev_type, copy_mode mode> -class cCudaData -{ - protected: - void** buffer; - int* buf_size; - host_type* host_data; - dev_array* dev_data_array; - dev_type* temp_data; - unsigned nbytes; - bool owns_dev_array; - bool current_data_on_device; //this is not yet working as intended and therefore deactivated - bool current_data_on_host; - bool is_continues; - bool pinned; - - public: - cCudaData(host_type* host_data, dev_array* dev_data_array, unsigned dim_x, unsigned dim_y=0, unsigned dim_z=0, bool is_pinned=false); - cCudaData(host_type* host_data, unsigned dim_x, unsigned dim_y=0, unsigned dim_z=0, bool is_pinned=false); - ~cCudaData(); - void* dev_data() {if(dev_data_array!=NULL) return dev_data_array->dev_data; else return NULL;}; - void set_dev_data(void* adev_data) {dev_data_array->dev_data=adev_data;}; - void set_dev_array(dev_array* adev_array) {dev_data_array=adev_array;}; - void set_host_data(host_type* host_data); - void* get_host_data() { return host_data;}; - void set_buffer(void** buffer,int* buf_size,bool ais_continues); - unsigned int* get_dim() {return dev_data_array->dim;}; - // if you want to upload data to the gpu, which will not change there, then set will_be_changed=false - // if you want to upload data to the gpu and update it there, then set will_be_changed=true (default) - void upload(bool will_be_changed=true); - void uploadAsync(int stream, bool will_be_changed=true ); - // if you want to download data just to have a look at it, then set will_be_changed=false - // if you are going to modify the downloaded data, then set will_be_changed=true (default) - void download(bool will_be_changed=true); - void downloadAsync(int stream); - void memset_device(int value); - void device_data_has_changed() {current_data_on_device=false;} - void host_data_has_changed() {current_data_on_host=false;} - int dev_size() { - int size = dev_data_array->dim[0]*sizeof(dev_type); - if(dev_data_array->dim[1]) size*=dev_data_array->dim[1]; - if(dev_data_array->dim[2]) size*=dev_data_array->dim[2]; - return size;} -}; - - -template <typename host_type, typename dev_type, copy_mode mode> -cCudaData<host_type, dev_type, mode> -::cCudaData(host_type* host_data, dev_array* dev_data_array, unsigned dim_x, unsigned dim_y, unsigned dim_z, bool is_pinned) -{ - pinned=is_pinned; - owns_dev_array = false; - current_data_on_device = false; - current_data_on_host = false; - is_continues = false; - this->host_data = host_data; - this->dev_data_array = dev_data_array; - unsigned ndev; - if((mode == x)||(mode==xx)) - { - ndev = dim_x; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = 0; - dev_data_array->dim[2] = 0; - } - else if(mode == xy || mode == yx ) - { - ndev = dim_x * dim_y; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = dim_y; - dev_data_array->dim[2] = 0; - } - else - { - ndev = dim_x * dim_y * dim_z; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = dim_y; - dev_data_array->dim[2] = dim_z; - } - nbytes = ndev * sizeof(dev_type); - if(nbytes<=0) - { - host_data=NULL; - temp_data=NULL; - dev_data_array->dev_data=NULL; - return; - } - - dev_data_array->dev_data = CudaWrapper_AllocCudaData(nbytes); - if(((mode!=x)&&(mode!=xx)) || typeid(host_type) != typeid(dev_type)) - { - if(not pinned) - temp_data = new dev_type[ndev]; - else - { - temp_data = (dev_type*) CudaWrapper_AllocPinnedHostData(ndev*sizeof(dev_type)); - } - } -} - -template <typename host_type, typename dev_type, copy_mode mode> -cCudaData<host_type, dev_type, mode> -::cCudaData(host_type* host_data, unsigned dim_x, unsigned dim_y, unsigned dim_z, bool is_pinned) -{ - pinned=is_pinned; - this->dev_data_array = new dev_array; - this->owns_dev_array = true; - current_data_on_device = false; - current_data_on_host = false; - is_continues = false; - this->host_data = host_data; - unsigned ndev; - if((mode == x)||(mode==xx)) - { - ndev = dim_x; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = 0; - dev_data_array->dim[2] = 0; - } - else if(mode == xy || mode == yx ) - { - ndev = dim_x * dim_y; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = dim_y; - dev_data_array->dim[2] = 0; - } - else - { - ndev = dim_x * dim_y * dim_z; - dev_data_array->dim[0] = dim_x; - dev_data_array->dim[1] = dim_y; - dev_data_array->dim[2] = dim_z; - } - nbytes = ndev * sizeof(dev_type); - if(nbytes<=0) - { - host_data=NULL; - temp_data=NULL; - dev_data_array->dev_data=NULL; - return; - } - - dev_data_array->dev_data = CudaWrapper_AllocCudaData(nbytes); - if(((mode!=x)&&(mode!=xx)) || (typeid(host_type) != typeid(dev_type))) - { - if(not pinned) - temp_data = new dev_type[ndev]; - else - { - temp_data = (dev_type*) CudaWrapper_AllocPinnedHostData(ndev*sizeof(dev_type)); - } - } -} - -template <typename host_type, typename dev_type, copy_mode mode> -cCudaData<host_type, dev_type, mode> -::~cCudaData() -{ - if(((mode!=x)&&(mode!=xx)) || typeid(host_type) != typeid(dev_type)) - { - if(not pinned) - delete [] temp_data; - else - { - CudaWrapper_FreePinnedHostData((void*)temp_data); - } - } - if((dev_data_array->dev_data)&&(nbytes>0)) - CudaWrapper_FreeCudaData(dev_data_array->dev_data,nbytes); - if(owns_dev_array) delete dev_data_array; -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::set_host_data(host_type* host_data) -{ - this->host_data = host_data; -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::upload(bool will_be_changed) -{ - // if current data is already up, do not re-upload it -// if(current_data_on_device) return; - if(buffer&&is_continues) - { - printf("Actual Buffer: %p %i\n",*buffer,*buf_size); - if(typeid(host_type)==typeid(double)) - { - if(typeid(dev_type)==typeid(double)) - { - CudaData_Upload_DoubleDouble((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - else if(typeid(dev_type)==typeid(float)) - { - CudaData_Upload_DoubleFloat((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - else if(typeid(host_type)==typeid(float)) - { - if(typeid(dev_type)==typeid(double)) - { - CudaData_Upload_FloatDouble((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - else if(typeid(dev_type)==typeid(float)) - { - CudaData_Upload_FloatFloat((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - else if(typeid(host_type)==typeid(int)) - { - if(typeid(dev_type)==typeid(int)) - { - CudaData_Upload_IntInt((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - } - switch(mode) - { - case x: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_UploadCudaData(host_data, dev_data_array->dev_data, nbytes); - else - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - } - break; - } - - case xx: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_UploadCudaData(host_data, dev_data_array->dev_data, nbytes); - else - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - } - break; - } - - case xy: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - dev_type* temp = &temp_data[i * dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - break; - } - - case yx: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[j*dev_data_array->dim[0]]; - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - break; - } - case xyz: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[(i*dev_data_array->dim[1]+j)*dev_data_array->dim[2]]; - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - break; - } - - case xzy: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - dev_type* temp = &temp_data[(i*dev_data_array->dim[2]+k)*dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes); - break; - } - } - // we have uploaded the data to the device, i.e.: - current_data_on_device = true; - // the data is going to change on the device, making the host data out-dated - if(will_be_changed) current_data_on_host = false; -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::uploadAsync(int stream,bool will_be_changed) -{ - // if current data is already up, do not re-upload it -// if(current_data_on_device) return; - if(buffer&&is_continues) - { - printf("Actual Buffer: %p %i\n",*buffer,*buf_size); - if(typeid(host_type)==typeid(double)) - { - if(typeid(dev_type)==typeid(double)) - { - CudaData_Upload_DoubleDouble((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - else if(typeid(dev_type)==typeid(float)) - { - CudaData_Upload_DoubleFloat((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - else if(typeid(host_type)==typeid(float)) - { - if(typeid(dev_type)==typeid(double)) - { - CudaData_Upload_FloatDouble((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - else if(typeid(dev_type)==typeid(float)) - { - CudaData_Upload_FloatFloat((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - else if(typeid(host_type)==typeid(int)) - { - if(typeid(dev_type)==typeid(int)) - { - CudaData_Upload_IntInt((void*) host_data,dev_data_array->dev_data, - dev_data_array->dim,mode,*buffer); - current_data_on_device = true; - if(will_be_changed) current_data_on_host = false; - return; - } - } - } - switch(mode) - { - case x: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_UploadCudaDataAsync(host_data, dev_data_array->dev_data, nbytes,stream); - else - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - } - break; - } - - case xx: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_UploadCudaDataAsync(host_data, dev_data_array->dev_data, nbytes,stream); - else - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - } - break; - } - - case xy: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - dev_type* temp = &temp_data[i * dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - break; - } - - case yx: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[j*dev_data_array->dim[0]]; - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - break; - } - case xyz: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[(i*dev_data_array->dim[1]+j)*dev_data_array->dim[2]]; - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - break; - } - - case xzy: - { - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - dev_type* temp = &temp_data[(i*dev_data_array->dim[2]+k)*dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufUploadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream); - break; - } - } - // we have uploaded the data to the device, i.e.: - current_data_on_device = true; - // the data is going to change on the device, making the host data out-dated - if(will_be_changed) current_data_on_host = false; -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::download(bool will_be_changed) -{ - // if current data is already down, do not re-download it -// if(current_data_on_host) return; - switch(mode) - { - case x: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_DownloadCudaData(host_data, dev_data_array->dev_data, nbytes); - else - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - } - break; - } - - case xx: - { - if(typeid(host_type) == typeid(dev_type)) - CudaWrapper_DownloadCudaData(host_data, dev_data_array->dev_data, nbytes); - else - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]); - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - } - break; - } - - case xy: - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - dev_type* temp = &temp_data[i * dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - break; - } - - case yx: - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[j*dev_data_array->dim[0]]; - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[i]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - break; - } - - case xyz: - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[(i * dev_data_array->dim[1]+j)*dev_data_array->dim[2]]; - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[k]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - break; - } - - case xzy: - { - CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes); - timespec time1,time2; - my_gettime(CLOCK_REALTIME,&time1); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - for(unsigned k=0; k<dev_data_array->dim[2]; ++k) - { - dev_type* temp = &temp_data[(i * dev_data_array->dim[2]+k)*dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[j]); - } - } - my_gettime(CLOCK_REALTIME,&time2); - CudaWrapper_AddCPUBufDownloadTime( - time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000); - break; - } - } - // we have downloaded the data to the host, i.e.: - current_data_on_host = true; - // the data is going to change on the host, making the device data out-dated - if(will_be_changed) current_data_on_device = false; -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::downloadAsync(int stream) -{ - switch(mode) - { - case x: - { - if(typeid(host_type) == typeid(dev_type)) - { - CudaWrapper_DownloadCudaDataAsync(host_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - } - else - { - CudaWrapper_DownloadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]); - } - break; - } - - case xx: - { - if(typeid(host_type) == typeid(dev_type)) - { - CudaWrapper_DownloadCudaDataAsync(host_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - } - else - { - CudaWrapper_DownloadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]); - } - break; - } - - case xy: - { - CudaWrapper_DownloadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - dev_type* temp = &temp_data[i * dev_data_array->dim[1]]; - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[j]); - } - } - break; - } - - case yx: - { - CudaWrapper_DownloadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes, stream); - CudaWrapper_SyncStream(stream); - for(unsigned j=0; j<dev_data_array->dim[1]; ++j) - { - dev_type* temp = &temp_data[j*dev_data_array->dim[0]]; - for(unsigned i=0; i<dev_data_array->dim[0]; ++i) - { - reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[i]); - } - } - break; - } - } -} - - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::memset_device(int value) -{ - CudaWrapper_Memset(dev_data_array->dev_data,value, nbytes); -} - -template <typename host_type, typename dev_type, copy_mode mode> -void cCudaData<host_type, dev_type, mode> -::set_buffer(void** abuffer,int* abuf_size,bool ais_continues) -{ - buffer = abuffer; - buf_size = abuf_size; - unsigned nbytes_buf=(nbytes/sizeof(dev_type))*sizeof(host_type); - if(buffer!=NULL) - if(not((typeid(host_type) == typeid(dev_type))&&(mode == x || mode == xx))) - { - printf("Allocate Buffer: %p %i\n",*buffer,*buf_size); - if(((*buffer)!=NULL)&&(*buf_size<nbytes_buf)) - CudaWrapper_FreeCudaData(*buffer,*buf_size); - if(*buf_size<nbytes_buf) - {*buffer=CudaWrapper_AllocCudaData(nbytes_buf);*buf_size=nbytes_buf;} - printf("Allocate Buffer2: %p %i\n",*buffer,*buf_size); - - } - is_continues=ais_continues; -} -#endif // _CUDA_DATA_H_ diff --git a/src/USER-CUDA/cuda_modify_flags.h b/src/USER-CUDA/cuda_modify_flags.h deleted file mode 100644 index e683456d27..0000000000 --- a/src/USER-CUDA/cuda_modify_flags.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifndef CUDA_MODIFY_FLAGS_H -#define CUDA_MODIFY_FLAGS_H - -#include "fix.h" - -namespace LAMMPS_NS { -namespace FixConstCuda { - static const int INITIAL_INTEGRATE_CUDA = FixConst::FIX_CONST_LAST << 0; - static const int POST_INTEGRATE_CUDA = FixConst::FIX_CONST_LAST << 1; - static const int PRE_EXCHANGE_CUDA = FixConst::FIX_CONST_LAST << 2; - static const int PRE_NEIGHBOR_CUDA = FixConst::FIX_CONST_LAST << 3; - static const int PRE_FORCE_CUDA = FixConst::FIX_CONST_LAST << 4; - static const int POST_FORCE_CUDA = FixConst::FIX_CONST_LAST << 5; - static const int FINAL_INTEGRATE_CUDA = FixConst::FIX_CONST_LAST << 6; - static const int END_OF_STEP_CUDA = FixConst::FIX_CONST_LAST << 7; - static const int THERMO_ENERGY_CUDA = FixConst::FIX_CONST_LAST << 8; - static const int MIN_POST_FORCE_CUDA = FixConst::FIX_CONST_LAST << 9; -} -} -// remember not to shift over 31 bits - -#endif // CUDA_MODIFY_FLAGS_H diff --git a/src/USER-CUDA/cuda_neigh_list.cpp b/src/USER-CUDA/cuda_neigh_list.cpp deleted file mode 100644 index 6e05cee274..0000000000 --- a/src/USER-CUDA/cuda_neigh_list.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include "cuda_neigh_list.h" -#include "neigh_list.h" -#include <cstring> -#include <vector> -#include <map> -#include <algorithm> -#include "user_cuda.h" -#include "atom.h" -#include "error.h" - -using namespace LAMMPS_NS; - -CudaNeighList::CudaNeighList(LAMMPS *lmp, class NeighList* neigh_list) : Pointers(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - MYDBG(printf("# CUDA: CudaNeighList::cudaNeighList() ... start\n");) - this->neigh_list = neigh_list; - neigh_list->cuda_list=this; - sneighlist.maxlocal = neigh_list->get_maxlocal(); - sneighlist.maxneighbors = 32; - sneighlist.maxcut = 0.0; - sneighlist.cutneighsq = NULL; - cu_neighbors = NULL; - cu_neighbors_border = NULL; - cu_neighbors_inner = NULL; - cu_numneigh_border = NULL; - cu_numneigh_inner = NULL; - cu_numneigh = NULL; - cu_ilist = NULL; - cu_ilist_border = NULL; - cu_inum_border = NULL; - inum_border = 0; - neighbors = NULL; - neighbors_inner = NULL; - neighbors_border = NULL; - numneigh_border = NULL; - numneigh_inner = NULL; - ilist_border = NULL; - - build_cuda = false; - sneighlist.binned_id=NULL; - sneighlist.bin_dim=new int[3]; - sneighlist.bin_dim[0]=0; - sneighlist.bin_dim[1]=0; - sneighlist.bin_dim[2]=0; - - cu_ex_type = NULL; - cu_ex1_bit = NULL; - cu_ex2_bit = NULL; - cu_ex_mol_bit = NULL; - sneighlist.nex_type=0; - sneighlist.nex_group=0; - sneighlist.nex_mol=0; - - sneighlist.bin_nmax=0; - sneighlist.bin_extraspace=0.05; - MYDBG(printf("# CUDA: CudaNeighList::cudaNeighList() ... end\n");) - -} - -CudaNeighList::~CudaNeighList() -{ - dev_free(); -} - -void CudaNeighList::dev_alloc() -{ - MYDBG( printf("# CUDA: CudaNeighList::dev_alloc() ... start\n"); ) - cu_ilist = new cCudaData<int , int , x> (neigh_list->ilist , & sneighlist.ilist , sneighlist.maxlocal ); - cu_numneigh = new cCudaData<int , int , x> (neigh_list->numneigh, & sneighlist.numneigh , sneighlist.maxlocal ); - neighbors = new int[atom->nmax*sneighlist.maxneighbors]; - cu_neighbors= new cCudaData<int, int, x> (neighbors , & sneighlist.neighbors, atom->nmax*sneighlist.maxneighbors ); - - if(cuda->shared_data.overlap_comm) - { - ilist_border = new int[sneighlist.maxlocal]; - numneigh_border = new int[sneighlist.maxlocal]; - numneigh_inner = new int[sneighlist.maxlocal]; - cu_inum_border = new cCudaData<int , int , x> (&inum_border , & sneighlist.inum_border , 1 ); - cu_ilist_border = new cCudaData<int , int , x> (ilist_border , & sneighlist.ilist_border , sneighlist.maxlocal ); - cu_numneigh_border = new cCudaData<int , int , x> (numneigh_border , & sneighlist.numneigh_border , sneighlist.maxlocal ); - cu_numneigh_inner = new cCudaData<int , int , x> (numneigh_inner , & sneighlist.numneigh_inner , sneighlist.maxlocal ); - neighbors_border = new int[sneighlist.maxlocal*sneighlist.maxneighbors]; - cu_neighbors_border= new cCudaData<int, int, x> (neighbors_border , & sneighlist.neighbors_border, sneighlist.maxlocal*sneighlist.maxneighbors ); - neighbors_inner = new int[sneighlist.maxlocal*sneighlist.maxneighbors]; - cu_neighbors_inner = new cCudaData<int, int, x> (neighbors_inner , & sneighlist.neighbors_inner , sneighlist.maxlocal*sneighlist.maxneighbors ); - } - cuda->shared_data.atom.update_neigh=2; - MYDBG( printf("# CUDA: CudaNeighList::dev_alloc() ... end\n"); ) -} - -void CudaNeighList::dev_free() -{ - MYDBG( printf("# CUDA: CudaNeighList::dev_free() ... start\n"); ) - delete cu_numneigh; - delete cu_ilist; - delete [] neighbors; - delete cu_neighbors; - - if(cuda->shared_data.overlap_comm) - { - delete [] ilist_border; - delete [] numneigh_border; - delete [] numneigh_inner; - delete [] neighbors_border; - delete [] neighbors_inner; - delete cu_inum_border; - delete cu_neighbors_border; - delete cu_neighbors_inner; - delete cu_numneigh_border; - delete cu_numneigh_inner; - delete cu_ilist_border; - } - MYDBG( printf("# CUDA: CudaNeighList::dev_free() ... end\n"); ) -} - -void CudaNeighList::grow_device() -{ - MYDBG(printf("# CUDA: CudaNeighList::grow_device() ... start\n");) - // if host has allocated more memory for atom arrays than device has, then allocate more memory on device - int new_maxlocal = neigh_list->get_maxlocal(); - if(sneighlist.maxlocal < new_maxlocal) - { - sneighlist.maxlocal = new_maxlocal; - dev_free(); - dev_alloc(); - } - - if(!cu_ilist || !cu_numneigh) dev_alloc(); - - // check, if hosts data has been allocated somewhere else - if(cu_ilist ->get_host_data() != neigh_list->ilist) cu_ilist ->set_host_data(neigh_list->ilist); - if(cu_numneigh->get_host_data() != neigh_list->numneigh) cu_numneigh->set_host_data(neigh_list->numneigh); - - MYDBG(printf("# CUDA: CudaNeighList::grow_device() ... end\n");) -} - - -void CudaNeighList::nl_upload(bool will_be_changed) -{ - //return; - MYDBG(printf("# CUDA: CudaNeighList::nl_upload() ... start\n");) - if(cu_ilist) - cu_ilist->upload(); - if(cu_numneigh) - cu_numneigh->upload(); - MYDBG(printf("# CUDA: CudaNeighList::nl_upload() ... end\n");) -} - -void CudaNeighList::nl_download(bool will_be_changed) -{ - MYDBG(printf("# CUDA: CudaNeighList::nl_download() ... start\n");) - if(cu_ilist) - cu_ilist->download(); - if(cu_numneigh) - cu_numneigh->download(); - MYDBG(printf("# CUDA: CudaNeighList::nl_download() ... end\n");) -} diff --git a/src/USER-CUDA/cuda_neigh_list.h b/src/USER-CUDA/cuda_neigh_list.h deleted file mode 100644 index f733cdfd61..0000000000 --- a/src/USER-CUDA/cuda_neigh_list.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifndef LMP_NEIGH_LIST_CUDA_H -#define LMP_NEIGH_LIST_CUDA_H - -#include "pointers.h" -#include "cuda_data.h" -#include "neigh_list.h" - -namespace LAMMPS_NS -{ - -class CudaNeighList : protected Pointers -{ - public: - cCudaData<int , int , x>* cu_ilist; - cCudaData<int , int , x>* cu_numneigh; - cCudaData<int , int , x>* cu_inum_border; - cCudaData<int , int , x>* cu_ilist_border; - cCudaData<int , int , x>* cu_numneigh_border; - cCudaData<int , int , x>* cu_numneigh_inner; - cCudaData<int , int , x>* cu_neighbors; - cCudaData<int , int , x>* cu_neighbors_border; - cCudaData<int , int , x>* cu_neighbors_inner; - cCudaData<int , int , x>* cu_ex_type; - cCudaData<int , int , x>* cu_ex1_bit; - cCudaData<int , int , x>* cu_ex2_bit; - cCudaData<int , int , x>* cu_ex_mol_bit; - - - cuda_shared_neighlist sneighlist; - - int* neighbors; - int* neighbors_inner; - int* neighbors_border; - int inum_border; - int* ilist_border; - int* numneigh_border; - int* numneigh_inner; - int nex_type; - int nex_group; - int nex_mol; - - bool build_cuda; - - CudaNeighList(class LAMMPS *, class NeighList* neigh_list); - ~CudaNeighList(); - void grow_device(); // will grow pages memory on device, keeping old pages. will grow lists memory on device, deleting old lists - void nl_upload(bool will_be_changed=true); - void nl_download(bool will_be_changed=true); - NeighList* neigh_list; - - void dev_alloc(); - void dev_free(); - - private: - class Cuda *cuda; -}; - -} - -#endif diff --git a/src/USER-CUDA/domain_cuda.cpp b/src/USER-CUDA/domain_cuda.cpp deleted file mode 100644 index 997a42a681..0000000000 --- a/src/USER-CUDA/domain_cuda.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author (triclinic) : Pieter in 't Veld (SNL) -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <math.h> -#include "domain_cuda.h" -#include "style_region.h" -#include "atom.h" -#include "force.h" -#include "update.h" -#include "modify.h" -#include "fix.h" -#include "fix_deform.h" -#include "region.h" -#include "lattice.h" -#include "comm.h" -#include "memory.h" -#include "error.h" - -#include "user_cuda.h" -#include "domain_cu.h" - -using namespace LAMMPS_NS; - -#define BIG 1.0e20 -#define SMALL 1.0e-4 -#define DELTA 1 - -enum {NO_REMAP, X_REMAP, V_REMAP}; // same as fix_deform.cpp - -/* ---------------------------------------------------------------------- - default is periodic -------------------------------------------------------------------------- */ - -DomainCuda::DomainCuda(LAMMPS* lmp) : Domain(lmp) -{ - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); -} - -/* ---------------------------------------------------------------------- */ - -void DomainCuda::init() -{ - Domain::init(); - - if(not cuda->finished_run) { - cuda->setDomainParams(); - Cuda_Domain_Init(&cuda->shared_data); - } -} - -/* ---------------------------------------------------------------------- - set global box params - assumes boxlo/hi and triclinic tilts are already set -------------------------------------------------------------------------- */ - -void DomainCuda::set_global_box() -{ - // one-time activation of CUDA - // do it here, b/c is now too late for further package commands - // activation must occur before any USER-CUDA class communicates with GPUs - - cuda->activate(); - - Domain::set_global_box(); - - if(not cuda->finished_run) { - cuda->setDomainParams(); - } -} - -/* ---------------------------------------------------------------------- - set lamda box params, only need be done one time - assumes global box is defined and proc assignment has been made by comm - for uppermost proc, insure subhi = 1.0 (in case round-off occurs) -------------------------------------------------------------------------- */ - -void DomainCuda::set_lamda_box() -{ - Domain::set_lamda_box(); - - if(not cuda->finished_run) { - cuda->setDomainParams(); - } -} - -/* ---------------------------------------------------------------------- - set local subbox params - assumes global box is defined and proc assignment has been made - for uppermost proc, insure subhi = boxhi (in case round-off occurs) -------------------------------------------------------------------------- */ - -void DomainCuda::set_local_box() -{ - Domain::set_local_box(); - - if(not cuda->finished_run) { - // cuda->setDomainParams(); - //Cuda_Domain_Init(&cuda->shared_data); - } -} - -/* ---------------------------------------------------------------------- - reset global & local boxes due to global box boundary changes - if shrink-wrapped, determine atom extent and reset boxlo/hi - if shrink-wrapped and triclinic, perform shrink-wrap in box coords -------------------------------------------------------------------------- */ - -void DomainCuda::reset_box() -{ - if(nonperiodic == 2) { - - // convert back to box coords for shrink-wrap operation - - if(triclinic) lamda2x(atom->nlocal); - - // compute extent of atoms on this proc - - double extent[3][2], all[3][2]; - - extent[2][0] = extent[1][0] = extent[0][0] = BIG; - extent[2][1] = extent[1][1] = extent[0][1] = -BIG; - - double** x = atom->x; - int nlocal = atom->nlocal; - - if(cuda->finished_setup && (!cuda->oncpu)) { - extent[0][0] = cuda->extent[0]; - extent[0][1] = cuda->extent[1]; - extent[1][0] = cuda->extent[2]; - extent[1][1] = cuda->extent[3]; - extent[2][0] = cuda->extent[4]; - extent[2][1] = cuda->extent[5]; - } else - for(int i = 0; i < nlocal; i++) { - extent[0][0] = MIN(extent[0][0], x[i][0]); - extent[0][1] = MAX(extent[0][1], x[i][0]); - extent[1][0] = MIN(extent[1][0], x[i][1]); - extent[1][1] = MAX(extent[1][1], x[i][1]); - extent[2][0] = MIN(extent[2][0], x[i][2]); - extent[2][1] = MAX(extent[2][1], x[i][2]); - } - - // compute extent across all procs - // flip sign of MIN to do it in one Allreduce MAX - - extent[0][0] = -extent[0][0]; - extent[1][0] = -extent[1][0]; - extent[2][0] = -extent[2][0]; - - MPI_Allreduce(extent, all, 6, MPI_DOUBLE, MPI_MAX, world); - - // in shrink-wrapped dims, set box by atom extent - // if minimum set, enforce min box size settings - - if(triclinic == 0) { - if(xperiodic == 0) { - if(boundary[0][0] == 2) boxlo[0] = -all[0][0] - small[0]; - else if(boundary[0][0] == 3) - boxlo[0] = MIN(-all[0][0] - small[0], minxlo); - - if(boundary[0][1] == 2) boxhi[0] = all[0][1] + small[0]; - else if(boundary[0][1] == 3) boxhi[0] = MAX(all[0][1] + small[0], minxhi); - - if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box"); - } - - if(yperiodic == 0) { - if(boundary[1][0] == 2) boxlo[1] = -all[1][0] - small[1]; - else if(boundary[1][0] == 3) - boxlo[1] = MIN(-all[1][0] - small[1], minylo); - - if(boundary[1][1] == 2) boxhi[1] = all[1][1] + small[1]; - else if(boundary[1][1] == 3) boxhi[1] = MAX(all[1][1] + small[1], minyhi); - - if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box"); - } - - if(zperiodic == 0) { - if(boundary[2][0] == 2) boxlo[2] = -all[2][0] - small[2]; - else if(boundary[2][0] == 3) - boxlo[2] = MIN(-all[2][0] - small[2], minzlo); - - if(boundary[2][1] == 2) boxhi[2] = all[2][1] + small[2]; - else if(boundary[2][1] == 3) boxhi[2] = MAX(all[2][1] + small[2], minzhi); - - if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box"); - } - - } else { - double lo[3], hi[3]; - - if(xperiodic == 0) { - lo[0] = -all[0][0]; - lo[1] = 0.0; - lo[2] = 0.0; - Domain::lamda2x(lo, lo); - hi[0] = all[0][1]; - hi[1] = 0.0; - hi[2] = 0.0; - Domain::lamda2x(hi, hi); - - if(boundary[0][0] == 2) boxlo[0] = lo[0] - small[0]; - else if(boundary[0][0] == 3) boxlo[0] = MIN(lo[0] - small[0], minxlo); - - if(boundary[0][1] == 2) boxhi[0] = hi[0] + small[0]; - else if(boundary[0][1] == 3) boxhi[0] = MAX(hi[0] + small[0], minxhi); - - if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box"); - } - - if(yperiodic == 0) { - lo[0] = 0.0; - lo[1] = -all[1][0]; - lo[2] = 0.0; - Domain::lamda2x(lo, lo); - hi[0] = 0.0; - hi[1] = all[1][1]; - hi[2] = 0.0; - Domain::lamda2x(hi, hi); - - if(boundary[1][0] == 2) boxlo[1] = lo[1] - small[1]; - else if(boundary[1][0] == 3) boxlo[1] = MIN(lo[1] - small[1], minylo); - - if(boundary[1][1] == 2) boxhi[1] = hi[1] + small[1]; - else if(boundary[1][1] == 3) boxhi[1] = MAX(hi[1] + small[1], minyhi); - - if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box"); - - //xy *= (boxhi[1]-boxlo[1]) / yprd; - } - - if(zperiodic == 0) { - lo[0] = 0.0; - lo[1] = 0.0; - lo[2] = -all[2][0]; - Domain::lamda2x(lo, lo); - hi[0] = 0.0; - hi[1] = 0.0; - hi[2] = all[2][1]; - Domain::lamda2x(hi, hi); - - if(boundary[2][0] == 2) boxlo[2] = lo[2] - small[2]; - else if(boundary[2][0] == 3) boxlo[2] = MIN(lo[2] - small[2], minzlo); - - if(boundary[2][1] == 2) boxhi[2] = hi[2] + small[2]; - else if(boundary[2][1] == 3) boxhi[2] = MAX(hi[2] + small[2], minzhi); - - if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box"); - - //xz *= (boxhi[2]-boxlo[2]) / xprd; - //yz *= (boxhi[2]-boxlo[2]) / yprd; - } - } - } - - set_global_box(); - set_local_box(); - - if(not cuda->finished_run) { - cuda->setDomainParams(); - Cuda_Domain_Init(&cuda->shared_data); - } - - // if shrink-wrapped, convert to lamda coords for new box - // must re-invoke pbc() b/c x2lamda result can be outside 0,1 due to roundoff - - if(nonperiodic == 2 && triclinic) { - x2lamda(atom->nlocal); - pbc(); - } -} - -/* ---------------------------------------------------------------------- - enforce PBC and modify box image flags for each atom - called every reneighboring and by other commands that change atoms - resulting coord must satisfy lo <= coord < hi - MAX is important since coord - prd < lo can happen when coord = hi - if fix deform, remap velocity of fix group atoms by box edge velocities - for triclinic, atoms must be in lamda coords (0-1) before pbc is called - image = 10 bits for each dimension - increment/decrement in wrap-around fashion -------------------------------------------------------------------------- */ - -void DomainCuda::pbc() -{ - if(cuda->finished_setup && (!cuda->oncpu)) { - cuda->setDomainParams(); - Cuda_Domain_PBC(&cuda->shared_data, deform_vremap, deform_groupbit, cuda->extent); - return; - } - - Domain::pbc(); -} - - -/* ---------------------------------------------------------------------- - convert triclinic 0-1 lamda coords to box coords for all N atoms - x = H lamda + x0; -------------------------------------------------------------------------- */ - -void DomainCuda::lamda2x(int n) -{ - if(cuda->finished_setup && (!cuda->oncpu)) { - Cuda_Domain_lamda2x(&cuda->shared_data, n); - return; - } - - Domain::lamda2x(n); -} - -/* ---------------------------------------------------------------------- - convert box coords to triclinic 0-1 lamda coords for all N atoms - lamda = H^-1 (x - x0) -------------------------------------------------------------------------- */ - -void DomainCuda::x2lamda(int n) -{ - if(cuda->finished_setup && (!cuda->oncpu)) { - Cuda_Domain_x2lamda(&cuda->shared_data, n); - return; - } - - Domain::x2lamda(n); -} diff --git a/src/USER-CUDA/domain_cuda.h b/src/USER-CUDA/domain_cuda.h deleted file mode 100644 index ede402ffba..0000000000 --- a/src/USER-CUDA/domain_cuda.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_DOMAIN_CUDA_H -#define LMP_DOMAIN_CUDA_H - -#include "pointers.h" -#include "domain.h" - -namespace LAMMPS_NS { - -class DomainCuda : public Domain { - public: - DomainCuda(class LAMMPS *); - void init(); - void set_global_box(); - void set_lamda_box(); - void set_local_box(); - void reset_box(); - void pbc(); - - virtual void lamda2x(int); - virtual void x2lamda(int); - - protected: - class Cuda *cuda; -}; - -} - -#endif diff --git a/src/USER-CUDA/fft3d_cuda.cpp b/src/USER-CUDA/fft3d_cuda.cpp deleted file mode 100644 index 42ec4b16ee..0000000000 --- a/src/USER-CUDA/fft3d_cuda.cpp +++ /dev/null @@ -1,609 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstdio> -#include <cstdlib> -#include <cmath> -#include "fft3d_cuda.h" -#include "fft3d_cuda_cu.h" -#include "remap.h" -#include <ctime> -#include "cuda_wrapper_cu.h" - -#ifdef FFT_CUFFT -#endif - -#define MIN(A,B) ((A) < (B)) ? (A) : (B) -#define MAX(A,B) ((A) > (B)) ? (A) : (B) - -/* ---------------------------------------------------------------------- - Data layout for 3d FFTs: - - data set of Nfast x Nmid x Nslow elements is owned by P procs - on input, each proc owns a subsection of the elements - on output, each proc will own a (possibly different) subsection - my subsection must not overlap with any other proc's subsection, - i.e. the union of all proc's input (or output) subsections must - exactly tile the global Nfast x Nmid x Nslow data set - when called from C, all subsection indices are - C-style from 0 to N-1 where N = Nfast or Nmid or Nslow - when called from F77, all subsection indices are - F77-style from 1 to N where N = Nfast or Nmid or Nslow - a proc can own 0 elements on input or output - by specifying hi index < lo index - on both input and output, data is stored contiguously on a processor - with a fast-varying, mid-varying, and slow-varying index -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Perform 3d FFT - - Arguments: - in starting address of input data on this proc - out starting address of where output data for this proc - will be placed (can be same as in) - flag 1 for forward FFT, -1 for inverse FFT - plan plan returned by previous call to fft_3d_create_plan -------------------------------------------------------------------------- */ - -void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) -{ -#ifdef FFT_CUFFT - plan->iterate++; - my_times starttime,starttime2; - my_times endtime,endtime2; - - int i,total,length,offset,num; - double norm; - FFT_DATA *data,*copy; - // system specific constants - - - // pre-remap to prepare for 1st FFTs if needed - // copy = loc for remap result - int nprocs=plan->nprocs; -if(nprocs>1) -{ - if(plan->init) - my_gettime(CLOCK_REALTIME,&starttime); - if (plan->pre_plan) { - if (plan->pre_target == 0) copy = out; - else copy = plan->copy; - if(plan->init) remap_3d((double *) in, (double *) out, (double *) plan->scratch,plan->pre_plan); - data = out; - } - else - data = in; -} - cufftResult retvalc; - if(plan->init) - { - if(nprocs>1) - { - if(sizeof(FFT_CFLOAT)==sizeof(double))cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize/2,cudaMemcpyHostToDevice); - if(sizeof(FFT_CFLOAT)==sizeof(float)) cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice); - initfftdata((double*)plan->cudata2,(FFT_CFLOAT*)plan->cudata,plan->nfast,plan->nmid,plan->nslow); - } - } - if (flag == -1) - { - retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_FORWARD); - } - else - { - retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_INVERSE); - } - if(retvalc!=CUFFT_SUCCESS) {printf("ErrorCUFFT: %i\n",retvalc);exit(EXIT_FAILURE);} - - FFTsyncthreads(); -#endif -} -/* ---------------------------------------------------------------------- - Create plan for performing a 3d FFT - - Arguments: - comm MPI communicator for the P procs which own the data - nfast,nmid,nslow size of global 3d matrix - in_ilo,in_ihi input bounds of data I own in fast index - in_jlo,in_jhi input bounds of data I own in mid index - in_klo,in_khi input bounds of data I own in slow index - out_ilo,out_ihi output bounds of data I own in fast index - out_jlo,out_jhi output bounds of data I own in mid index - out_klo,out_khi output bounds of data I own in slow index - scaled 0 = no scaling of result, 1 = scaling - permute permutation in storage order of indices on output - 0 = no permutation - 1 = permute once = mid->fast, slow->mid, fast->slow - 2 = permute twice = slow->fast, fast->mid, mid->slow - nbuf returns size of internal storage buffers used by FFT -------------------------------------------------------------------------- */ - -struct fft_plan_3d *fft_3d_create_plan_cuda( - MPI_Comm comm, int nfast, int nmid, int nslow, - int in_ilo, int in_ihi, int in_jlo, int in_jhi, - int in_klo, int in_khi, - int out_ilo, int out_ihi, int out_jlo, int out_jhi, - int out_klo, int out_khi, - int scaled, int permute, int *nbuf,bool ainit) -{ -#ifdef FFT_CUFFT - struct fft_plan_3d *plan; - int me,nprocs; - int i,num,flag,remapflag,fftflag; - int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi; - int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi; - int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi; - int out_size,first_size,second_size,third_size,copy_size,scratch_size; - int np1,np2,ip1,ip2; - int list[50]; - - // system specific variables - - // query MPI info - - MPI_Comm_rank(comm,&me); - MPI_Comm_size(comm,&nprocs); - -#ifndef FFT_CUFFT - error->all(FLERR,"ERROR: Trying to use cuda fft without FFT_CUFFT set. Recompile with make option 'cufft=1'."); -#endif - // compute division of procs in 2 dimensions not on-processor - bifactor_cuda(nprocs,&np1,&np2); - ip1 = me % np1; - ip2 = me/np1; - - // in case of CUDA FFT every proc does the full FFT in order to avoid data transfers (the problem is other wise heavily bandwidth limited) - - int ip1out = ip1; - int ip2out = ip2; - int np1out = np1; - int np2out = np2; - - ip1 = 0; - ip2 = 0; - np1 = 1; - np2 = 1; - - // allocate memory for plan data struct - - plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d)); - if (plan == NULL) return NULL; - plan->init=ainit; - - // remap from initial distribution to layout needed for 1st set of 1d FFTs - // not needed if all procs own entire fast axis initially - // first indices = distribution after 1st set of FFTs - - if (in_ilo == 0 && in_ihi == nfast-1) - flag = 0; - else - flag = 1; - - if(nprocs>1)flag=1; - - MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); - - if (remapflag == 0) { - first_ilo = in_ilo; - first_ihi = in_ihi; - first_jlo = in_jlo; - first_jhi = in_jhi; - first_klo = in_klo; - first_khi = in_khi; - plan->pre_plan = NULL; - } - else { - first_ilo = 0; - first_ihi = nfast - 1; - first_jlo = ip1*nmid/np1; - first_jhi = (ip1+1)*nmid/np1 - 1; - first_klo = ip2*nslow/np2; - first_khi = (ip2+1)*nslow/np2 - 1; - int members=2; - if(plan->init) members=1; - plan->pre_plan = - remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, - first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi, - members,0,0,2,0); - if (plan->pre_plan == NULL) return NULL; - } - - // 1d FFTs along fast axis - - plan->length1 = nfast; - plan->total1 = nfast * nmid * nslow; - - // remap from 1st to 2nd FFT - // choose which axis is split over np1 vs np2 to minimize communication - // second indices = distribution after 2nd set of FFTs - - second_ilo = ip1*nfast/np1; - second_ihi = (ip1+1)*nfast/np1 - 1; - second_jlo = 0; - second_jhi = nmid - 1; - second_klo = ip2*nslow/np2; - second_khi = (ip2+1)*nslow/np2 - 1; - plan->mid1_plan = - remap_3d_create_plan(comm, - first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi, - second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi, - 2,1,0,2,0); - if (plan->mid1_plan == NULL) return NULL; - - // 1d FFTs along mid axis - - plan->length2 = nmid; - plan->total2 = nfast * nmid * nslow; - - // remap from 2nd to 3rd FFT - // if final distribution is permute=2 with all procs owning entire slow axis - // then this remapping goes directly to final distribution - // third indices = distribution after 3rd set of FFTs - - flag=1; - - MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); - - if (remapflag == 0) { - third_ilo = out_ilo; - third_ihi = out_ihi; - third_jlo = out_jlo; - third_jhi = out_jhi; - third_klo = out_klo; - third_khi = out_khi; - } - else { - third_ilo = ip1*nfast/np1; - third_ihi = (ip1+1)*nfast/np1 - 1; - third_jlo = ip2*nmid/np2; - third_jhi = (ip2+1)*nmid/np2 - 1; - third_klo = 0; - third_khi = nslow - 1; - } - - plan->mid2_plan = - remap_3d_create_plan(comm, - second_jlo,second_jhi,second_klo,second_khi, - second_ilo,second_ihi, - third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi, - 2,1,0,2,0); - if (plan->mid2_plan == NULL) return NULL; - - // 1d FFTs along slow axis - - plan->length3 = nslow; - plan->total3 = nfast * nmid * nslow; - - // remap from 3rd FFT to final distribution - // not needed if permute = 2 and third indices = out indices on all procs - - flag=1; - - MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); - - if (remapflag == 0) - plan->post_plan = NULL; - else { - plan->post_plan = - remap_3d_create_plan(comm, - third_klo,third_khi,third_ilo,third_ihi, - third_jlo,third_jhi, - out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi, - 2,(permute+1)%3,0,2,0); - if (plan->post_plan == NULL) return NULL; - } - - // configure plan memory pointers and allocate work space - // out_size = amount of memory given to FFT by user - // first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps - // copy_size = amount needed internally for extra copy of data - // scratch_size = amount needed internally for remap scratch space - // for each remap: - // out space used for result if big enough, else require copy buffer - // accumulate largest required remap scratch space - - out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); - first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) * - (first_khi-first_klo+1); - second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) * - (second_khi-second_klo+1); - third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * - (third_khi-third_klo+1); - - plan->ihi_out=out_ihi; - plan->ilo_out=out_ilo; - plan->jhi_out=out_jhi; - plan->jlo_out=out_jlo; - plan->khi_out=out_khi; - plan->klo_out=out_klo; - - copy_size = 0; - scratch_size = 0; - - if (plan->pre_plan) { - if (first_size <= out_size) - plan->pre_target = 0; - else { - plan->pre_target = 1; - copy_size = MAX(copy_size,first_size); - } - scratch_size = MAX(scratch_size,first_size); - } - - if (plan->mid1_plan) { - if (second_size <= out_size) - plan->mid1_target = 0; - else { - plan->mid1_target = 1; - copy_size = MAX(copy_size,second_size); - } - scratch_size = MAX(scratch_size,second_size); - } - - if (plan->mid2_plan) { - if (third_size <= out_size) - plan->mid2_target = 0; - else { - plan->mid2_target = 1; - copy_size = MAX(copy_size,third_size); - } - scratch_size = MAX(scratch_size,third_size); - } - - if (plan->post_plan) - scratch_size = MAX(scratch_size,out_size); - - *nbuf = copy_size + scratch_size; - - if (copy_size) { - plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA)); - if (plan->copy == NULL) return NULL; - } - else plan->copy = NULL; - - if (scratch_size) { - plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA)); - if (plan->scratch == NULL) return NULL; - } - else plan->scratch = NULL; - - // system specific pre-computation of 1d FFT coeffs - // and scaling normalization - - cufftResult retvalc; - int nfft = (in_ihi-in_ilo+1) * (in_jhi-in_jlo+1) * - (in_khi-in_klo+1); - int nfft_brick = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - - int nfft_both = MAX(nfft,nfft_brick); - nfft_both=nfast*nmid*nslow; - - plan->cudatasize=nfft_both*sizeof(FFT_DATA); - - //retvalc=cufftPlan1d(&(plan->plan_fast), nfast, CUFFT_PLAN,plan->total1/nfast); - //if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT1: %i\n",retvalc); - plan->nfast=nfast; - - //retvalc=cufftPlan1d(&(plan->plan_mid), nmid, CUFFT_PLAN,plan->total2/nmid); - //if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT2: %i\n",retvalc); - plan->nmid=nmid; - - //retvalc=cufftPlan1d(&(plan->plan_slow), nslow, CUFFT_PLAN,plan->total3/nslow); - //if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc); - plan->nslow=nslow; - - retvalc=cufftPlan3d(&(plan->plan_3d), nslow,nmid,nfast, CUFFT_PLAN); - if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc); - - plan->nprocs=nprocs; - plan->me=me; - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - - plan->coretime=0; - plan->iterate=0; - plan->ffttime=0; - return plan; - #endif -} - -/* ---------------------------------------------------------------------- - Destroy a 3d fft plan -------------------------------------------------------------------------- */ - -void fft_3d_destroy_plan_cuda(struct fft_plan_3d *plan) -{ -#ifdef FFT_CUFFT - if (plan->pre_plan) remap_3d_destroy_plan(plan->pre_plan); - if (plan->mid1_plan) remap_3d_destroy_plan(plan->mid1_plan); - if (plan->mid2_plan) remap_3d_destroy_plan(plan->mid2_plan); - if (plan->post_plan) remap_3d_destroy_plan(plan->post_plan); - - if (plan->copy) free(plan->copy); - if (plan->scratch) free(plan->scratch); - - - //cufftDestroy(plan->plan_fast); - //cufftDestroy(plan->plan_mid); - //cufftDestroy(plan->plan_slow); - cufftDestroy(plan->plan_3d); - free(plan); -#endif -} - -/* ---------------------------------------------------------------------- - recursively divide n into small factors, return them in list -------------------------------------------------------------------------- */ - -void factor_cuda(int n, int *num, int *list) -{ - if (n == 1) { - return; - } - else if (n % 2 == 0) { - *list = 2; - (*num)++; - factor_cuda(n/2,num,list+1); - } - else if (n % 3 == 0) { - *list = 3; - (*num)++; - factor_cuda(n/3,num,list+1); - } - else if (n % 5 == 0) { - *list = 5; - (*num)++; - factor_cuda(n/5,num,list+1); - } - else if (n % 7 == 0) { - *list = 7; - (*num)++; - factor_cuda(n/7,num,list+1); - } - else if (n % 11 == 0) { - *list = 11; - (*num)++; - factor_cuda(n/11,num,list+1); - } - else if (n % 13 == 0) { - *list = 13; - (*num)++; - factor_cuda(n/13,num,list+1); - } - else { - *list = n; - (*num)++; - return; - } -} - -/* ---------------------------------------------------------------------- - divide n into 2 factors of as equal size as possible -------------------------------------------------------------------------- */ - -void bifactor_cuda(int n, int *factor1, int *factor2) -{ - int n1,n2,facmax; - - facmax = static_cast<int> (sqrt((double) n)); - - for (n1 = facmax; n1 > 0; n1--) { - n2 = n/n1; - if (n1*n2 == n) { - *factor1 = n1; - *factor2 = n2; - return; - } - } -} - -/* ---------------------------------------------------------------------- - perform just the 1d FFTs needed by a 3d FFT, no data movement - used for timing purposes - - Arguments: - in starting address of input data on this proc, all set to 0.0 - nsize size of in - flag 1 for forward FFT, -1 for inverse FFT - plan plan returned by previous call to fft_3d_create_plan -------------------------------------------------------------------------- */ - -void fft_1d_only_cuda(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) -{ -#ifdef FFT_CUFFT - int i,total,length,offset,num; - double norm; - - // system specific constants - - - - // total = size of data needed in each dim - // length = length of 1d FFT in each dim - // total/length = # of 1d FFTs in each dim - // if total > nsize, limit # of 1d FFTs to available size of data - - int total1 = plan->total1; - int length1 = plan->length1; - int total2 = plan->total2; - int length2 = plan->length2; - int total3 = plan->total3; - int length3 = plan->length3; - - if (total1 > nsize) total1 = (nsize/length1) * length1; - if (total2 > nsize) total2 = (nsize/length2) * length2; - if (total3 > nsize) total3 = (nsize/length3) * length3; - - // perform 1d FFTs in each of 3 dimensions - // data is just an array of 0.0 - - - cudaMemcpy((void**) &(plan->cudata), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice); - if (flag == -1) { - cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD); - /*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_FORWARD); - cufft(plan->plan_mid, plan->cudata, plan->cudata,CUFFT_FORWARD); - cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_FORWARD);*/ - } else { - cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD); - /*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_INVERSE); - cufft(plan->plan_mid,plan->cudata, plan->cudata,CUFFT_INVERSE); - cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_INVERSE);*/ - } - cudaMemcpy((void*) data, (void**) &(plan->cudata), plan->cudatasize,cudaMemcpyDeviceToHost); - - // scaling if required - // limit num to size of data - -#endif -} diff --git a/src/USER-CUDA/fft3d_cuda.h b/src/USER-CUDA/fft3d_cuda.h deleted file mode 100644 index 059ac977f7..0000000000 --- a/src/USER-CUDA/fft3d_cuda.h +++ /dev/null @@ -1,148 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -// User-settable FFT precision - -// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag) -// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag) -#include "cuda_precision.h" -//#define FFT_PRECISION 2 - -// ------------------------------------------------------------------------- - -// Data types for single-precision complex - -#if FFT_PRECISION_CU == 1 - -#ifdef FFT_CUFFT -#include "cuda_runtime.h" -#include "cufft.h" -typedef struct { - float re; - float im; -} FFT_DATA; -typedef cufftComplex cufftData; -typedef cufftReal cufftDataInit; -#define cufft cufftExecC2C -#define cufftinit cufftExecR2C -#define CUFFT_PLAN CUFFT_C2C -#define CUFFT_PLAN_INIT CUFFT_R2C -#else -typedef struct { - float re; - float im; -} FFT_DATA; -#endif - -#endif - -// ------------------------------------------------------------------------- - -// Data types for double-precision complex - -#if FFT_PRECISION_CU == 2 - - -#ifdef FFT_CUFFT -#include "cuda_runtime.h" -#include "cufft.h" -typedef cufftDoubleComplex cufftData; -typedef cufftDoubleReal cufftDataInit; -typedef struct { - double re; - double im; -} FFT_DATA; -#define cufft cufftExecZ2Z -#define cufftinit cufftExecD2Z -#define CUFFT_PLAN CUFFT_Z2Z -#define CUFFT_PLAN_INIT CUFFT_D2Z -#endif - -#endif - -// ------------------------------------------------------------------------- - -// details of how to do a 3d FFT - -struct fft_plan_3d { - struct remap_plan_3d *pre_plan; // remap from input -> 1st FFTs - struct remap_plan_3d *mid1_plan; // remap from 1st -> 2nd FFTs - struct remap_plan_3d *mid2_plan; // remap from 2nd -> 3rd FFTs - struct remap_plan_3d *post_plan; // remap from 3rd FFTs -> output - FFT_DATA *copy; // memory for remap results (if needed) - FFT_DATA *scratch; // scratch space for remaps - int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) - int length1,length2,length3; // length of 1st,2nd,3rd FFTs - int pre_target; // where to put remap results - int mid1_target,mid2_target; - int scaled; // whether to scale FFT results - int normnum; // # of values to rescale - double norm; // normalization factor for rescaling - - double coretime; - double ffttime; - int iterate; - // system specific 1d FFT info - -#ifdef FFT_CUFFT - //CUdeviceptr cudata; - cufftData* cudata; - cufftData* cudata2; - unsigned int cudatasize; - cufftHandle plan_fast; - cufftHandle plan_mid; - cufftHandle plan_slow; - cufftHandle plan_3d; - int nfast; - int nmid; - int nslow; - int ihi_out,ilo_out,jhi_out,jlo_out,khi_out,klo_out; - int me,nprocs; -#endif - int init; -}; - -// function prototypes - -void fft_3d_destroy_plan_cuda(struct fft_plan_3d *); -void factor_cuda(int, int *, int *); -void bifactor_cuda(int, int *, int *); -void fft_1d_only_cuda(FFT_DATA *, int, int, struct fft_plan_3d *); -void fft_3d_cudaA(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *); -void fft_3d_cuda(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *); -struct fft_plan_3d *fft_3d_create_plan_cuda(MPI_Comm, int, int, int, - int, int, int, int, int, int, int, int, int, int, int, int, - int, int, int *,bool init); diff --git a/src/USER-CUDA/fft3d_wrap_cuda.cpp b/src/USER-CUDA/fft3d_wrap_cuda.cpp deleted file mode 100644 index f02c38d831..0000000000 --- a/src/USER-CUDA/fft3d_wrap_cuda.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include "fft3d_wrap_cuda.h" -#include "error.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -FFT3dCuda::FFT3dCuda(LAMMPS *lmp, MPI_Comm comm, int nfast, int nmid, int nslow, - int in_ilo, int in_ihi, int in_jlo, int in_jhi, - int in_klo, int in_khi, - int out_ilo, int out_ihi, int out_jlo, int out_jhi, - int out_klo, int out_khi, - int scaled, int permute, int *nbuf,bool init) : Pointers(lmp) -{ -#ifdef FFT_CUFFT - plan = fft_3d_create_plan_cuda(comm,nfast,nmid,nslow, - in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, - out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi, - scaled,permute,nbuf,init); -#endif -#ifndef FFT_CUFFT - plan = fft_3d_create_plan(comm,nfast,nmid,nslow, - in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, - out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi, - scaled,permute,nbuf,0); -#endif - if (plan == NULL) error->one(FLERR,"Could not create 3d FFT plan"); -} - -/* ---------------------------------------------------------------------- */ - -FFT3dCuda::~FFT3dCuda() -{ -#ifdef FFT_CUFFT - fft_3d_destroy_plan_cuda(plan); -#endif -#ifndef FFT_CUFFT - fft_3d_destroy_plan(plan); -#endif -} - -/* ---------------------------------------------------------------------- */ - -void FFT3dCuda::compute(double *in, double *out, int flag) -{ -#ifdef FFT_CUFFT - fft_3d_cuda((FFT_DATA *) in,(FFT_DATA *) out,flag,plan); -#endif -#ifndef FFT_CUFFT - fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan); -#endif -} - -/* ---------------------------------------------------------------------- */ - -void FFT3dCuda::timing1d(double *in, int nsize, int flag) -{ -#ifdef FFT_CUFFT - fft_1d_only_cuda((FFT_DATA *) in,nsize,flag,plan); -#endif -#ifndef FFT_CUFFT - fft_1d_only((FFT_DATA *) in,nsize,flag,plan); -#endif -} - -#ifdef FFT_CUFFT -void FFT3dCuda::set_cudata(void* cudata,void* cudata2) -{ - - plan->cudata=(cufftData*) cudata; - plan->cudata2=(cufftData*) cudata2; - -} -#endif diff --git a/src/USER-CUDA/fft3d_wrap_cuda.h b/src/USER-CUDA/fft3d_wrap_cuda.h deleted file mode 100644 index cc6baa9ebe..0000000000 --- a/src/USER-CUDA/fft3d_wrap_cuda.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef FFT3D_WRAP_CUDA_H_ -#define FFT3D_WRAP_CUDA_H_ - -#include "pointers.h" - -#ifdef FFT_CUFFT - #include "fft3d_cuda.h" -#endif -#ifndef FFT_CUFFT - #include "fft3d.h" -#endif - -namespace LAMMPS_NS { - -class FFT3dCuda : protected Pointers { - public: - FFT3dCuda(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,int,int,int, - int,int,int,int,int,int,int,int,int *,bool); - ~FFT3dCuda(); - void compute(double *, double *, int); - void timing1d(double *, int, int); - -#ifdef FFT_CUFFT - void set_cudata(void* cudata,void* cudata2); -#endif - private: - struct fft_plan_3d *plan; -}; - -} - -#endif /*FFT3D_WRAP_CUDA_H_*/ diff --git a/src/USER-CUDA/fix_addforce_cuda.cpp b/src/USER-CUDA/fix_addforce_cuda.cpp deleted file mode 100644 index 5462668f4f..0000000000 --- a/src/USER-CUDA/fix_addforce_cuda.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - - -#include <cstring> -#include <cstdlib> -#include "fix_addforce_cuda.h" -#include "fix_addforce_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "error.h" -#include "force.h" -#include "domain.h" -#include "user_cuda.h" -#include "memory.h" -#include "cuda_modify_flags.h" - - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixAddForceCuda::FixAddForceCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg < 6) error->all(FLERR,"Illegal fix addforce/cuda command"); - - scalar_flag = 1; - vector_flag = 1; - size_vector = 3; - global_freq = 1; - extscalar = 1; - extvector = 1; - - xvalue = force->numeric(FLERR,arg[3]); - yvalue = force->numeric(FLERR,arg[4]); - zvalue = force->numeric(FLERR,arg[5]); - - // optional args - - iregion = -1; - - int iarg = 6; - while (iarg < narg) { - if (strcmp(arg[iarg],"region") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix addforce/cuda command"); - iregion = domain->find_region(arg[iarg+1]); - if (iregion == -1) error->all(FLERR,"Fix addforce/cuda region ID does not exist"); - iarg += 2; - } else error->all(FLERR,"Illegal fix addforce/cuda command"); - } - - if(iregion!=-1) error->all(FLERR,"Error: fix addforce/cuda does not currently support 'region' option"); - - force_flag = 0; - foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0; - cu_foriginal = NULL; -} - -/* ---------------------------------------------------------------------- */ - -int FixAddForceCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - mask |= THERMO_ENERGY_CUDA; - mask |= POST_FORCE_RESPA; - mask |= MIN_POST_FORCE_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::init() -{ - if(not cu_foriginal) - cu_foriginal = new cCudaData<double, F_CFLOAT, x> (foriginal,4); - if (strstr(update->integrate_style,"respa")) - nlevels_respa = ((Respa *) update->integrate)->nlevels; -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::setup(int vflag) -{ - MYDBG( printf("# CUDA: FixAddForceCuda::setup\n"); ) - - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixAddForceCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - post_force(vflag); - cuda->cu_f->download(); - - } - else { - ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); - cuda->cu_f->download(); - post_force_respa(vflag,nlevels_respa-1,0); - cuda->cu_f->upload(); - ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); - } - MYDBG( printf("# CUDA: FixAddForceCuda::setup done\n"); ) -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::min_setup(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::post_force(int vflag) -{ - MYDBG( printf("# CUDA: FixAddForceCuda::postforce start\n"); ) - force_flag = 0; - cu_foriginal->memset_device(0); - Cuda_FixAddForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_CFLOAT*) cu_foriginal->dev_data()); - cu_foriginal->download(); -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::post_force_respa(int vflag, int ilevel, int iloop) -{ - if (ilevel == nlevels_respa-1) post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixAddForceCuda::min_post_force(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- - potential energy of added force -------------------------------------------------------------------------- */ - -double FixAddForceCuda::compute_scalar() -{ - // only sum across procs one time - - if (force_flag == 0) { - MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world); - force_flag = 1; - } - return foriginal_all[0]; -} - -/* ---------------------------------------------------------------------- - return components of total force on fix group before force was changed -------------------------------------------------------------------------- */ - -double FixAddForceCuda::compute_vector(int n) -{ - // only sum across procs one time - - if (force_flag == 0) { - MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world); - force_flag = 1; - } - return foriginal_all[n+1]; -} diff --git a/src/USER-CUDA/fix_addforce_cuda.h b/src/USER-CUDA/fix_addforce_cuda.h deleted file mode 100644 index 043cae6d21..0000000000 --- a/src/USER-CUDA/fix_addforce_cuda.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(addforce/cuda,FixAddForceCuda) - -#else - -#ifndef LMP_FIX_ADD_FORCE_CUDA_H -#define LMP_FIX_ADD_FORCE_CUDA_H - -#include "fix.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixAddForceCuda : public Fix { - public: - FixAddForceCuda(class LAMMPS *, int, char **); - int setmask(); - void init(); - void setup(int); - void min_setup(int); - void post_force(int); - void post_force_respa(int, int, int); - void min_post_force(int); - double compute_scalar(); - double compute_vector(int); - - private: - class Cuda *cuda; - int iregion; - double xvalue,yvalue,zvalue; - double foriginal[4],foriginal_all[4]; - cCudaData<double , F_CFLOAT , x>* cu_foriginal; - int force_flag; - int nlevels_respa; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_aveforce_cuda.cpp b/src/USER-CUDA/fix_aveforce_cuda.cpp deleted file mode 100644 index 9b4ceaa67c..0000000000 --- a/src/USER-CUDA/fix_aveforce_cuda.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - - -#include <mpi.h> -#include <cstring> -#include <cstdlib> -#include "fix_aveforce_cuda.h" -#include "fix_aveforce_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "domain.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" -#include "variable.h" -#include "input.h" -#include "modify.h" -#include "atom_masks.h" -#include "error.h" -#include "force.h" - - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -enum{NONE,CONSTANT,EQUAL}; - -/* ---------------------------------------------------------------------- */ - -FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 6) error->all(FLERR,"Illegal fix aveforce command"); - - vector_flag = 1; - size_vector = 3; - global_freq = 1; - extvector = 1; - - xstr = ystr = zstr = NULL; - xvalue = yvalue = zvalue = 0; - - if (strstr(arg[3],"v_") == arg[3]) { - int n = strlen(&arg[3][2]) + 1; - xstr = new char[n]; - strcpy(xstr,&arg[3][2]); - } else if (strcmp(arg[3],"NULL") == 0) { - xstyle = NONE; - } else { - xvalue = force->numeric(FLERR,arg[3]); - xstyle = CONSTANT; - } - if (strstr(arg[4],"v_") == arg[4]) { - int n = strlen(&arg[4][2]) + 1; - ystr = new char[n]; - strcpy(ystr,&arg[4][2]); - } else if (strcmp(arg[4],"NULL") == 0) { - ystyle = NONE; - } else { - yvalue = force->numeric(FLERR,arg[4]); - ystyle = CONSTANT; - } - if (strstr(arg[5],"v_") == arg[5]) { - int n = strlen(&arg[5][2]) + 1; - zstr = new char[n]; - strcpy(zstr,&arg[5][2]); - } else if (strcmp(arg[5],"NULL") == 0) { - zstyle = NONE; - } else { - zvalue = force->numeric(FLERR,arg[5]); - zstyle = CONSTANT; - } - - // optional args - - iregion = -1; - - int iarg = 6; - while (iarg < narg) { - if (strcmp(arg[iarg],"region") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix aveforce command"); - iregion = domain->find_region(arg[iarg+1]); - if (iregion == -1) error->all(FLERR,"Fix aveforce region ID does not exist"); - iarg += 2; - } else error->all(FLERR,"Illegal fix aveforce command"); - - } - - if(iregion!=-1) error->all(FLERR,"Error: fix aveforce/cuda does not currently support 'region' option"); - - foriginal_all[0] = foriginal_all[1] = foriginal_all[2] = foriginal_all[3] = 0.0; - foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0; - cu_foriginal = NULL; - -} - -FixAveForceCuda::~FixAveForceCuda() -{ - delete [] xstr; - delete [] ystr; - delete [] zstr; -} - -/* ---------------------------------------------------------------------- */ - -int FixAveForceCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - mask |= POST_FORCE_RESPA; - mask |= MIN_POST_FORCE_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::init() -{ - if(not cu_foriginal) - cu_foriginal = new cCudaData<double, F_CFLOAT, x> (foriginal,4); - - if (xstr) { - xvar = input->variable->find(xstr); - if (xvar < 0) - error->all(FLERR,"Variable name for fix aveforce does not exist"); - if (input->variable->equalstyle(xvar)) xstyle = EQUAL; - else error->all(FLERR,"Variable for fix aveforce is invalid style"); - } - if (ystr) { - yvar = input->variable->find(ystr); - if (yvar < 0) - error->all(FLERR,"Variable name for fix aveforce does not exist"); - if (input->variable->equalstyle(yvar)) ystyle = EQUAL; - else error->all(FLERR,"Variable for fix aveforce is invalid style"); - } - if (zstr) { - zvar = input->variable->find(zstr); - if (zvar < 0) - error->all(FLERR,"Variable name for fix aveforce does not exist"); - if (input->variable->equalstyle(zvar)) zstyle = EQUAL; - else error->all(FLERR,"Variable for fix aveforce is invalid style"); - } - - if (xstyle == EQUAL || ystyle == EQUAL || zstyle == EQUAL) varflag = EQUAL; - else varflag = CONSTANT; - -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::setup(int vflag) -{ - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixAveForceCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - post_force(vflag); - cuda->cu_f->download(); - - } - else - { - } -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::min_setup(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::post_force(int vflag) -{ - // sum forces on participating atoms - - cu_foriginal->memset_device(0); - Cuda_FixAveForceCuda_PostForce_FOrg(&cuda->shared_data, groupbit,(F_CFLOAT*) cu_foriginal->dev_data()); - cu_foriginal->download(); - - // average the force on participating atoms - // add in requested amount - - MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world); - int ncount = static_cast<int> (foriginal_all[3]); - if (ncount == 0) return; - - if (varflag == EQUAL) { - unsigned int datamask = EMPTY_MASK; - if (xstyle == EQUAL) datamask &= input->variable->data_mask(xstr); - if (ystyle == EQUAL) datamask &= input->variable->data_mask(ystr); - if (zstyle == EQUAL) datamask &= input->variable->data_mask(zstr); - - cuda->download(datamask); - modify->clearstep_compute(); - if (xstyle == EQUAL) xvalue = input->variable->compute_equal(xvar); - if (ystyle == EQUAL) yvalue = input->variable->compute_equal(yvar); - if (zstyle == EQUAL) zvalue = input->variable->compute_equal(zvar); - modify->addstep_compute(update->ntimestep + 1); - } - - double fave[3]; - fave[0] = foriginal_all[0]/ncount + xvalue; - fave[1] = foriginal_all[1]/ncount + yvalue; - fave[2] = foriginal_all[2]/ncount + zvalue; - - // set force of all participating atoms to same value - // only for active dimensions - - Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,!(xstyle==NONE),!(ystyle==NONE),!(zstyle==NONE),fave[0],fave[1],fave[2]); -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::post_force_respa(int vflag, int ilevel, int iloop) -{ - -} - -/* ---------------------------------------------------------------------- */ - -void FixAveForceCuda::min_post_force(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- - return components of total force on fix group before force was changed -------------------------------------------------------------------------- */ - -double FixAveForceCuda::compute_vector(int n) -{ - return foriginal_all[n]; -} diff --git a/src/USER-CUDA/fix_aveforce_cuda.h b/src/USER-CUDA/fix_aveforce_cuda.h deleted file mode 100644 index c22e702ee2..0000000000 --- a/src/USER-CUDA/fix_aveforce_cuda.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(aveforce/cuda,FixAveForceCuda) - -#else - - -#ifndef LMP_FIX_AVE_FORCE_CUDA_H -#define LMP_FIX_AVE_FORCE_CUDA_H - -#include "fix.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixAveForceCuda : public Fix { - public: - FixAveForceCuda(class LAMMPS *, int, char **); - ~FixAveForceCuda(); - int setmask(); - void init(); - void setup(int); - void min_setup(int); - void post_force(int); - void post_force_respa(int, int, int); - void min_post_force(int); - double compute_vector(int); - - private: - class Cuda *cuda; - char *xstr,*ystr,*zstr; - int iregion; - double xvalue,yvalue,zvalue; - double foriginal_all[4]; - double foriginal[4]; - cCudaData<double , F_CFLOAT , x>* cu_foriginal; - int varflag; - int xvar,yvar,zvar,xstyle,ystyle,zstyle; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_enforce2d_cuda.cpp b/src/USER-CUDA/fix_enforce2d_cuda.cpp deleted file mode 100644 index d10edf1cce..0000000000 --- a/src/USER-CUDA/fix_enforce2d_cuda.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstring> -#include "fix_enforce2d_cuda.h" -#include "fix_enforce2d_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "domain.h" -#include "respa.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixEnforce2DCuda::FixEnforce2DCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 3) error->all(FLERR,"Illegal fix enforce2d command"); -} - -/* ---------------------------------------------------------------------- */ - -int FixEnforce2DCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - mask |= POST_FORCE_RESPA; - mask |= MIN_POST_FORCE_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::init() -{ - if (domain->dimension == 3) - error->all(FLERR,"Cannot use fix enforce2d/cuda with 3d simulation"); - if (atom->omega_flag) - error->warning(FLERR,"Enforce2d/cuda does not support omega_flag on gpu yet. Will be handled on cpu."); - - if (atom->angmom_flag) - error->warning(FLERR,"Enforce2d/cuda does not support angmom_flag (angular momentum) on gpu yet. Will be handled on cpu."); - - if (atom->torque_flag) - error->warning(FLERR,"Enforce2d/cuda does not support torque_flag on gpu yet. Will be handled on cpu."); -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::setup(int vflag) -{ - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixEnforce2dCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - cuda->cu_v->upload(); - post_force(vflag); - cuda->cu_f->download(); - cuda->cu_v->download(); - } - else { - int nlevels_respa = ((Respa *) update->integrate)->nlevels; - for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) { - ((Respa *) update->integrate)->copy_flevel_f(ilevel); - post_force_respa(vflag,ilevel,0); - ((Respa *) update->integrate)->copy_f_flevel(ilevel); - } - } -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::min_setup(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::post_force(int vflag) -{ - Cuda_FixEnforce2dCuda_PostForce(&cuda->shared_data, groupbit); - - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - if (atom->omega_flag) { - double **omega = atom->omega; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - omega[i][0] = 0.0; - omega[i][1] = 0.0; - } - } - - if (atom->angmom_flag) { - double **angmom = atom->angmom; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - angmom[i][0] = 0.0; - angmom[i][1] = 0.0; - } - } - - if (atom->torque_flag) { - double **torque = atom->torque; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - } - } -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::post_force_respa(int vflag, int ilevel, int iloop) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixEnforce2DCuda::min_post_force(int vflag) -{ - post_force(vflag); -} diff --git a/src/USER-CUDA/fix_enforce2d_cuda.h b/src/USER-CUDA/fix_enforce2d_cuda.h deleted file mode 100644 index 63bf289d9e..0000000000 --- a/src/USER-CUDA/fix_enforce2d_cuda.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(enforce2d/cuda,FixEnforce2DCuda) - -#else - -#ifndef LMP_FIX_ENFORCE2D_CUDA_H -#define LMP_FIX_ENFORCE2D_CUDA_H - -#include "fix.h" - -namespace LAMMPS_NS { - -class FixEnforce2DCuda : public Fix { - public: - FixEnforce2DCuda(class LAMMPS *, int, char **); - int setmask(); - void init(); - void setup(int); - void min_setup(int); - void post_force(int); - void post_force_respa(int, int, int); - void min_post_force(int); - - private: - class Cuda *cuda; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_freeze_cuda.cpp b/src/USER-CUDA/fix_freeze_cuda.cpp deleted file mode 100644 index c4a04af564..0000000000 --- a/src/USER-CUDA/fix_freeze_cuda.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ -#include <cstring> -#include <cstdlib> -#include "fix_freeze_cuda.h" -#include "fix_freeze_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "error.h" -#include "user_cuda.h" -#include "memory.h" -#include "modify.h" -#include "cuda_modify_flags.h" - - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixFreezeCuda::FixFreezeCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 3) error->all(FLERR,"Illegal fix freeze command"); - - if (!atom->torque_flag) - error->all(FLERR,"Fix freeze requires atom attribute torque"); - - vector_flag = 1; - size_vector = 3; - global_freq = 1; - extvector = 1; - - - - force_flag = 0; - foriginal[0] = foriginal[1] = foriginal[2] = 0.0; - cu_foriginal=NULL; -} - -/* ---------------------------------------------------------------------- */ - -int FixFreezeCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - mask |= THERMO_ENERGY_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixFreezeCuda::init() -{ - if(not cu_foriginal) - cu_foriginal = new cCudaData<double, F_CFLOAT, x> (foriginal,3); - int count = 0; - for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"freeze") == 0) count++; - if (count > 1) error->all(FLERR,"More than one fix freeze"); -} - -/* ---------------------------------------------------------------------- */ - -void FixFreezeCuda::setup(int vflag) -{ - MYDBG( printf("# CUDA: FixFreezeCuda::setup\n"); ) - - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixFreezeCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - post_force(vflag); - cuda->cu_f->download(); - - } - - MYDBG( printf("# CUDA: FixFreezeCuda::setup done\n"); ) -} - -/* ---------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- */ - -void FixFreezeCuda::post_force(int vflag) -{ - MYDBG( printf("# CUDA: FixFreezeCuda::postforce start\n"); ) - force_flag = 0; - cu_foriginal->memset_device(0); - Cuda_FixFreezeCuda_PostForce(&cuda->shared_data, groupbit, (F_CFLOAT*) cu_foriginal->dev_data()); - cu_foriginal->download(); -} - -/* ---------------------------------------------------------------------- */ - - - -/* ---------------------------------------------------------------------- - return components of total force on fix group before force was changed -------------------------------------------------------------------------- */ - -double FixFreezeCuda::compute_vector(int n) -{ - // only sum across procs one time - - if (force_flag == 0) { - MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world); - force_flag = 1; - } - return foriginal_all[n+1]; -} diff --git a/src/USER-CUDA/fix_freeze_cuda.h b/src/USER-CUDA/fix_freeze_cuda.h deleted file mode 100644 index 9f6a1a99f0..0000000000 --- a/src/USER-CUDA/fix_freeze_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(freeze/cuda,FixFreezeCuda) - -#else - -#ifndef LMP_FIX_FREEZE_CUDA_H -#define LMP_FIX_FREEZE_CUDA_H - -#include "fix.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixFreezeCuda : public Fix { - public: - FixFreezeCuda(class LAMMPS *, int, char **); - int setmask(); - void init(); - void setup(int); - void post_force(int); - double compute_vector(int); - - private: - class Cuda *cuda; - double foriginal[3],foriginal_all[3]; - cCudaData<double , F_CFLOAT , x>* cu_foriginal; - int force_flag; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_gravity_cuda.cpp b/src/USER-CUDA/fix_gravity_cuda.cpp deleted file mode 100644 index 34107ed593..0000000000 --- a/src/USER-CUDA/fix_gravity_cuda.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ -#include <cmath> -#include <cstdio> -#include <cstring> -#include <cstdlib> -#include "fix_gravity_cuda.h" -#include "fix_gravity_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "domain.h" -#include "respa.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" -#include "math_const.h" -#include "error.h" -#include "force.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; -using namespace MathConst; - -enum{CHUTE,SPHERICAL,GRADIENT,VECTOR}; - -/* ---------------------------------------------------------------------- */ - -FixGravityCuda::FixGravityCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg < 5) error->all(FLERR,"Illegal fix gravity command"); - - magnitude = force->numeric(FLERR,arg[3]); - - if (strcmp(arg[4],"chute") == 0) { - if (narg != 6) error->all(FLERR,"Illegal fix gravity command"); - style = CHUTE; - phi = 0.0; - theta = 180.0 - force->numeric(FLERR,arg[5]); - } else if (strcmp(arg[4],"spherical") == 0) { - if (narg != 7) error->all(FLERR,"Illegal fix gravity command"); - style = SPHERICAL; - phi = force->numeric(FLERR,arg[5]); - theta = force->numeric(FLERR,arg[6]); - } else if (strcmp(arg[4],"gradient") == 0) { - if (narg != 9) error->all(FLERR,"Illegal fix gravity command"); - style = GRADIENT; - phi = force->numeric(FLERR,arg[5]); - theta = force->numeric(FLERR,arg[6]); - phigrad = force->numeric(FLERR,arg[7]); - thetagrad = force->numeric(FLERR,arg[8]); - } else if (strcmp(arg[4],"vector") == 0) { - if (narg != 8) error->all(FLERR,"Illegal fix gravity command"); - style = VECTOR; - xdir = force->numeric(FLERR,arg[5]); - ydir = force->numeric(FLERR,arg[6]); - zdir = force->numeric(FLERR,arg[7]); - } else error->all(FLERR,"Illegal fix gravity command"); - - degree2rad = MY_PI/180.0; - - if (style == CHUTE || style == SPHERICAL || style == GRADIENT) { - if (domain->dimension == 3) { - xgrav = sin(degree2rad * theta) * cos(degree2rad * phi); - ygrav = sin(degree2rad * theta) * sin(degree2rad * phi); - zgrav = cos(degree2rad * theta); - } else { - xgrav = sin(degree2rad * theta); - ygrav = cos(degree2rad * theta); - zgrav = 0.0; - } - } else if (style == VECTOR) { - if (domain->dimension == 3) { - double length = sqrt(xdir*xdir + ydir*ydir + zdir*zdir); - xgrav = xdir/length; - ygrav = ydir/length; - zgrav = zdir/length; - } else { - double length = sqrt(xdir*xdir + ydir*ydir); - xgrav = xdir/length; - ygrav = ydir/length; - zgrav = 0.0; - } - } - - time_origin = update->ntimestep; -} - -/* ---------------------------------------------------------------------- */ - -int FixGravityCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixGravityCuda::init() -{ - dt = update->dt; - - xacc = magnitude*xgrav; - yacc = magnitude*ygrav; - zacc = magnitude*zgrav; -} - -/* ---------------------------------------------------------------------- */ - -void FixGravityCuda::setup(int vflag) -{ - MYDBG( printf("# CUDA: FixGravityCuda::setup\n"); ) - - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixGravityCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - post_force(vflag); - cuda->cu_f->download(); - - } - else { - } - MYDBG( printf("# CUDA: FixGravityCuda::setup done\n"); ) -} - -/* ---------------------------------------------------------------------- */ - -void FixGravityCuda::post_force(int vflag) -{ - // update direction of gravity vector if gradient style - - if (style == GRADIENT) { - if (domain->dimension == 3) { - double phi_current = degree2rad * - (phi + (update->ntimestep - time_origin)*dt*phigrad*360.0); - double theta_current = degree2rad * - (theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0); - xgrav = sin(theta_current) * cos(phi_current); - ygrav = sin(theta_current) * sin(phi_current); - zgrav = cos(theta_current); - } else { - double theta_current = degree2rad * - (theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0); - xgrav = sin(theta_current); - ygrav = cos(theta_current); - } - xacc = magnitude*xgrav; - yacc = magnitude*ygrav; - zacc = magnitude*zgrav; - } - - MYDBG( printf("# CUDA: FixGravityCuda::postforce start\n"); ) - Cuda_FixGravityCuda_PostForce(&cuda->shared_data, groupbit, xacc,yacc,zacc); -} diff --git a/src/USER-CUDA/fix_gravity_cuda.h b/src/USER-CUDA/fix_gravity_cuda.h deleted file mode 100644 index 98d2586660..0000000000 --- a/src/USER-CUDA/fix_gravity_cuda.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(gravity/cuda,FixGravityCuda) - -#else - -#ifndef LMP_FIX_GRAVITY_CUDA_H -#define LMP_FIX_GRAVITY_CUDA_H - -#include "fix.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixGravityCuda : public Fix { - public: - FixGravityCuda(class LAMMPS *, int, char **); - int setmask(); - void init(); - void setup(int); - void post_force(int); - - private: - class Cuda *cuda; - int style; - double magnitude,dt; - double phi,theta,phigrad,thetagrad; - double xdir,ydir,zdir; - double xgrav,ygrav,zgrav,xacc,yacc,zacc; - double degree2rad; - int time_origin; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_nh_cuda.cpp b/src/USER-CUDA/fix_nh_cuda.cpp deleted file mode 100644 index 1a5092a68f..0000000000 --- a/src/USER-CUDA/fix_nh_cuda.cpp +++ /dev/null @@ -1,2072 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Mark Stevens (SNL), Aidan Thompson (SNL) -------------------------------------------------------------------------- */ - -#include <cstring> -#include <cstdlib> -#include <cmath> -#include "fix_nh_cuda.h" -#include "atom.h" -#include "force.h" -#include "comm.h" -#include "modify.h" -#include "fix_deform.h" -#include "compute.h" -#include "kspace.h" -#include "update.h" -#include "respa.h" -#include "domain.h" -#include "memory.h" -#include "error.h" -#include "math_extra.h" -#include "user_cuda.h" -#include "fix_nh_cuda_cu.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -enum{NOBIAS,BIAS}; -enum{NONE,XYZ,XY,YZ,XZ}; -enum{ISO,ANISO,TRICLINIC}; - -/* ---------------------------------------------------------------------- - NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion - ---------------------------------------------------------------------- */ - -FixNHCuda::FixNHCuda(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg < 4) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - - restart_global = 1; - time_integrate = 1; - scalar_flag = 1; - vector_flag = 1; - global_freq = 1; - extscalar = 1; - extvector = 0; - - triggerneighsq = -1; - // default values - - pcouple = NONE; - drag = 0.0; - allremap = 1; - mtchain = mpchain = 3; - nc_tchain = nc_pchain = 1; - mtk_flag = 1; - deviatoric_flag = 0; - nreset_h0 = 0; - - // Used by FixNVTSllod to preserve non-default value - - mtchain_default_flag = 1; - - tstat_flag = 0; - double t_period = 0.0; - - double p_period[6]; - for (int i = 0; i < 6; i++) { - p_start[i] = p_stop[i] = p_period[i] = 0.0; - p_flag[i] = 0; - } - - // process keywords - - dimension = domain->dimension; - - int iarg = 3; - - while (iarg < narg) { - if (strcmp(arg[iarg],"temp") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - tstat_flag = 1; - t_start = force->numeric(FLERR,arg[iarg+1]); - t_stop = force->numeric(FLERR,arg[iarg+2]); - t_period = force->numeric(FLERR,arg[iarg+3]); - if (t_start < 0.0 || t_stop <= 0.0) - error->all(FLERR,"Target T for fix nvt/npt/nph cannot be 0.0"); - iarg += 4; - - } else if (strcmp(arg[iarg],"iso") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = XYZ; - p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]); - p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]); - p_period[0] = p_period[1] = p_period[2] = force->numeric(FLERR,arg[iarg+3]); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - } - iarg += 4; - } else if (strcmp(arg[iarg],"aniso") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = NONE; - p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]); - p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]); - p_period[0] = p_period[1] = p_period[2] = force->numeric(FLERR,arg[iarg+3]); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - } - iarg += 4; - } else if (strcmp(arg[iarg],"tri") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - pcouple = NONE; - p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]); - p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]); - p_period[0] = p_period[1] = p_period[2] = force->numeric(FLERR,arg[iarg+3]); - p_flag[0] = p_flag[1] = p_flag[2] = 1; - p_start[3] = p_start[4] = p_start[5] = 0.0; - p_stop[3] = p_stop[4] = p_stop[5] = 0.0; - p_period[3] = p_period[4] = p_period[5] = force->numeric(FLERR,arg[iarg+3]); - p_flag[3] = p_flag[4] = p_flag[5] = 1; - if (dimension == 2) { - p_start[2] = p_stop[2] = p_period[2] = 0.0; - p_flag[2] = 0; - p_start[3] = p_stop[3] = p_period[3] = 0.0; - p_flag[3] = 0; - p_start[4] = p_stop[4] = p_period[4] = 0.0; - p_flag[4] = 0; - } - iarg += 4; - - } else if (strcmp(arg[iarg],"x") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[0] = force->numeric(FLERR,arg[iarg+1]); - p_stop[0] = force->numeric(FLERR,arg[iarg+2]); - p_period[0] = force->numeric(FLERR,arg[iarg+3]); - p_flag[0] = 1; - deviatoric_flag = 1; - iarg += 4; - } else if (strcmp(arg[iarg],"y") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[1] = force->numeric(FLERR,arg[iarg+1]); - p_stop[1] = force->numeric(FLERR,arg[iarg+2]); - p_period[1] = force->numeric(FLERR,arg[iarg+3]); - p_flag[1] = 1; - deviatoric_flag = 1; - iarg += 4; - } else if (strcmp(arg[iarg],"z") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[2] = force->numeric(FLERR,arg[iarg+1]); - p_stop[2] = force->numeric(FLERR,arg[iarg+2]); - p_period[2] = force->numeric(FLERR,arg[iarg+3]); - p_flag[2] = 1; - deviatoric_flag = 1; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - - } else if (strcmp(arg[iarg],"yz") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[3] = force->numeric(FLERR,arg[iarg+1]); - p_stop[3] = force->numeric(FLERR,arg[iarg+2]); - p_period[3] = force->numeric(FLERR,arg[iarg+3]); - p_flag[3] = 1; - deviatoric_flag = 1; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - } else if (strcmp(arg[iarg],"xz") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[4] = force->numeric(FLERR,arg[iarg+1]); - p_stop[4] = force->numeric(FLERR,arg[iarg+2]); - p_period[4] = force->numeric(FLERR,arg[iarg+3]); - p_flag[4] = 1; - deviatoric_flag = 1; - iarg += 4; - if (dimension == 2) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - } else if (strcmp(arg[iarg],"xy") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - p_start[5] = force->numeric(FLERR,arg[iarg+1]); - p_stop[5] = force->numeric(FLERR,arg[iarg+2]); - p_period[5] = force->numeric(FLERR,arg[iarg+3]); - p_flag[5] = 1; - deviatoric_flag = 1; - iarg += 4; - - } else if (strcmp(arg[iarg],"couple") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ; - else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY; - else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ; - else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ; - else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE; - else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - - } else if (strcmp(arg[iarg],"drag") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - drag = force->numeric(FLERR,arg[iarg+1]); - if (drag < 0.0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"dilate") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - if (strcmp(arg[iarg+1],"all") == 0) allremap = 1; - else if (strcmp(arg[iarg+1],"partial") == 0) allremap = 0; - else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"tchain") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - mtchain = force->inumeric(FLERR,arg[iarg+1]); - if (mtchain < 1) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"pchain") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - mpchain = force->inumeric(FLERR,arg[iarg+1]); - if (mpchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"mtk") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1; - else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0; - else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"tloop") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nc_tchain = force->inumeric(FLERR,arg[iarg+1]); - if (nc_tchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"ploop") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nc_pchain = force->inumeric(FLERR,arg[iarg+1]); - if (nc_pchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else if (strcmp(arg[iarg],"nreset") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - nreset_h0 = force->inumeric(FLERR,arg[iarg+1]); - if (nreset_h0 < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command"); - iarg += 2; - } else error->all(FLERR,"Illegal fix nvt/npt/nph command"); - } - - // error checks - - if (dimension == 2 && (p_flag[2] || p_flag[3] || p_flag[4])) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - if (dimension == 2 && (pcouple == YZ || pcouple == XZ)) - error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation"); - - if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0)) - error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings"); - - if (p_flag[0] && domain->xperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - if (p_flag[1] && domain->yperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - if (p_flag[2] && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension"); - if (p_flag[3] && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - if (p_flag[4] && domain->zperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - if (p_flag[5] && domain->yperiodic == 0) - error->all(FLERR,"Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension"); - - if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5])) - error->all(FLERR,"Can not specify Pxy/Pxz/Pyz in " - "fix nvt/npt/nph with non-triclinic box"); - - if (pcouple == XYZ && dimension == 3 && - (p_start[0] != p_start[1] || p_start[0] != p_start[2] || - p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] || - p_period[0] != p_period[1] || p_period[0] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XYZ && dimension == 2 && - (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || - p_period[0] != p_period[1])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XY && - (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || - p_period[0] != p_period[1])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == YZ && - (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || - p_period[1] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - if (pcouple == XZ && - (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || - p_period[0] != p_period[2])) - error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings"); - - if ((tstat_flag && t_period <= 0.0) || - (p_flag[0] && p_period[0] <= 0.0) || - (p_flag[1] && p_period[1] <= 0.0) || - (p_flag[2] && p_period[2] <= 0.0) || - (p_flag[3] && p_period[3] <= 0.0) || - (p_flag[4] && p_period[4] <= 0.0) || - (p_flag[5] && p_period[5] <= 0.0)) - error->all(FLERR,"Fix nvt/npt/nph damping parameters must be > 0.0"); - - // set pstat_flag and box change variables - - pstat_flag = 0; - for (int i = 0; i < 6; i++) - if (p_flag[i]) pstat_flag = 1; - - if (pstat_flag) { - if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1; - if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1; - no_change_box = 1; - if (allremap == 0) restart_pbc = 1; - } - - // pstyle = TRICLINIC if any off-diagonal term is controlled -> 6 dof - // else pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof - // else pstyle = ANISO -> 3 dof - - if (p_flag[3] || p_flag[4] || p_flag[5]) pstyle = TRICLINIC; - else if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO; - else pstyle = ANISO; - - // convert input periods to frequencies - - t_freq = 0.0; - p_freq[0] = p_freq[1] = p_freq[2] = p_freq[3] = p_freq[4] = p_freq[5] = 0.0; - - if (tstat_flag) t_freq = 1.0 / t_period; - if (p_flag[0]) p_freq[0] = 1.0 / p_period[0]; - if (p_flag[1]) p_freq[1] = 1.0 / p_period[1]; - if (p_flag[2]) p_freq[2] = 1.0 / p_period[2]; - if (p_flag[3]) p_freq[3] = 1.0 / p_period[3]; - if (p_flag[4]) p_freq[4] = 1.0 / p_period[4]; - if (p_flag[5]) p_freq[5] = 1.0 / p_period[5]; - - // Nose/Hoover temp and pressure init - - size_vector = 0; - - if (tstat_flag) { - int ich; - eta = new double[mtchain]; - - // add one extra dummy thermostat, set to zero - - eta_dot = new double[mtchain+1]; - eta_dot[mtchain] = 0.0; - eta_dotdot = new double[mtchain]; - for (ich = 0; ich < mtchain; ich++) { - eta[ich] = eta_dot[ich] = eta_dotdot[ich] = 0.0; - } - eta_mass = new double[mtchain]; - size_vector += 2*2*mtchain; - } - - if (pstat_flag) { - omega[0] = omega[1] = omega[2] = 0.0; - omega_dot[0] = omega_dot[1] = omega_dot[2] = 0.0; - omega_mass[0] = omega_mass[1] = omega_mass[2] = 0.0; - omega[3] = omega[4] = omega[5] = 0.0; - omega_dot[3] = omega_dot[4] = omega_dot[5] = 0.0; - omega_mass[3] = omega_mass[4] = omega_mass[5] = 0.0; - if (pstyle == ISO) size_vector += 2*2*1; - else if (pstyle == ANISO) size_vector += 2*2*3; - else if (pstyle == TRICLINIC) size_vector += 2*2*6; - - if (mpchain) { - int ich; - etap = new double[mpchain]; - - // add one extra dummy thermostat, set to zero - - etap_dot = new double[mpchain+1]; - etap_dot[mpchain] = 0.0; - etap_dotdot = new double[mpchain]; - for (ich = 0; ich < mpchain; ich++) { - etap[ich] = etap_dot[ich] = - etap_dotdot[ich] = 0.0; - } - etap_mass = new double[mpchain]; - size_vector += 2*2*mpchain; - } - - if (deviatoric_flag) size_vector += 1; - } - - nrigid = 0; - rfix = NULL; - - // initialize vol0,t0 to zero to signal uninitialized - // values then assigned in init(), if necessary - - vol0 = t0 = 0.0; -} - -/* ---------------------------------------------------------------------- */ - -FixNHCuda::~FixNHCuda() -{ - delete [] rfix; - - // delete temperature and pressure if fix created them - - if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; - - if (tstat_flag) { - delete [] eta; - delete [] eta_dot; - delete [] eta_dotdot; - delete [] eta_mass; - } - - if (pstat_flag) { - if (pflag) modify->delete_compute(id_press); - delete [] id_press; - if (mpchain) { - delete [] etap; - delete [] etap_dot; - delete [] etap_dotdot; - delete [] etap_mass; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixNHCuda::setmask() -{ - int mask = 0; - mask |= INITIAL_INTEGRATE_CUDA; - mask |= FINAL_INTEGRATE_CUDA; - mask |= THERMO_ENERGY_CUDA; - //mask |= INITIAL_INTEGRATE_RESPA; - //mask |= FINAL_INTEGRATE_RESPA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::init() -{ - // insure no conflict with fix deform - - if (pstat_flag) - for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"deform") == 0) { - int *dimflag = ((FixDeform *) modify->fix[i])->dimflag; - if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || - (p_flag[2] && dimflag[2]) || (p_flag[3] && dimflag[3]) || - (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5])) - error->all(FLERR,"Cannot use fix npt and fix deform on " - "same component of stress tensor"); - } - - // set temperature and pressure ptrs - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix nvt/nph/npt does not exist"); - temperature = modify->compute[icompute]; - - if (temperature->tempbias) which = BIAS; - else which = NOBIAS; - - if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) error->all(FLERR,"Pressure ID for fix npt/nph does not exist"); - pressure = modify->compute[icompute]; - } - - // set timesteps and frequencies - - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - dthalf = 0.5 * update->dt; - dt4 = 0.25 * update->dt; - dt8 = 0.125 * update->dt; - dto = dthalf; - - p_freq_max = 0.0; - if (pstat_flag) { - p_freq_max = MAX(p_freq[0],p_freq[1]); - p_freq_max = MAX(p_freq_max,p_freq[2]); - if (pstyle == TRICLINIC) { - p_freq_max = MAX(p_freq_max,p_freq[3]); - p_freq_max = MAX(p_freq_max,p_freq[4]); - p_freq_max = MAX(p_freq_max,p_freq[5]); - } - pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain); - } - - if (tstat_flag) - tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain); - - // tally the number of dimensions that are barostatted - // also compute the initial volume and reference cell - // set initial volume and reference cell, if not already done - - if (pstat_flag) { - pdim = p_flag[0] + p_flag[1] + p_flag[2]; - if (vol0 == 0.0) { - if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; - else vol0 = domain->xprd * domain->yprd; - h0_inv[0] = domain->h_inv[0]; - h0_inv[1] = domain->h_inv[1]; - h0_inv[2] = domain->h_inv[2]; - h0_inv[3] = domain->h_inv[3]; - h0_inv[4] = domain->h_inv[4]; - h0_inv[5] = domain->h_inv[5]; - } - } - - boltz = force->boltz; - nktv2p = force->nktv2p; - - if (force->kspace) kspace_flag = 1; - else kspace_flag = 0; - - if (strcmp(update->integrate_style,"respa") == 0) { - nlevels_respa = ((Respa *) update->integrate)->nlevels; - step_respa = ((Respa *) update->integrate)->step; - dto = 0.5*step_respa[0]; - } - - // detect if any rigid fixes exist so rigid bodies move when box is remapped - // rfix[] = indices to each fix rigid - - delete [] rfix; - nrigid = 0; - rfix = NULL; - - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) nrigid++; - if (nrigid) { - rfix = new int[nrigid]; - nrigid = 0; - for (int i = 0; i < modify->nfix; i++) - if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i; - } - triggerneighsq= cuda->shared_data.atom.triggerneighsq; - cuda->neighbor_decide_by_integrator=1; - Cuda_FixNHCuda_Init(&cuda->shared_data,dtv,dtf); - -} - -/* ---------------------------------------------------------------------- - compute T,P before integrator starts -------------------------------------------------------------------------- */ - -void FixNHCuda::setup(int vflag) -{ - // initialize some quantities that were not available earlier - - //if (mtk_flag) mtk_factor = 1.0 + 1.0/atom->natoms; - //else mtk_factor = 1.0; - tdof = temperature->dof; - - // t_target is used by compute_scalar(), even for NPH - - if (tstat_flag) t_target = t_start; - else if (pstat_flag) { - - // t0 = initial value for piston mass and energy conservation - // cannot be done in init() b/c temperature cannot be called there - // is b/c Modify::init() inits computes after fixes due to dof dependence - // guesstimate a unit-dependent t0 if actual T = 0.0 - // if it was read in from a restart file, leave it be - - if (t0 == 0.0) { - t0 = temperature->compute_scalar(); - if (t0 == 0.0) { - if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0; - else t0 = 300.0; - } - } - t_target = t0; - } - - if (pstat_flag) compute_press_target(); - - t_current = temperature->compute_scalar(); - if (pstat_flag) { - if (pstyle == ISO) double tmp = pressure->compute_scalar(); - else pressure->compute_vector(); - couple(); - pressure->addstep(update->ntimestep+1); - } - - // initial forces on thermostat variables - - if (tstat_flag) { - eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq); - for (int ich = 1; ich < mtchain; ich++) - eta_mass[ich] = boltz * t_target / (t_freq*t_freq); - for (int ich = 1; ich < mtchain; ich++) { - eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1] - - boltz*t_target) / eta_mass[ich]; - } - } - - if (pstat_flag) { - double kt = boltz * t_target; - double nkt = atom->natoms * kt; - - for (int i = 0; i < 3; i++) - if (p_flag[i]) - omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); - - if (pstyle == TRICLINIC) { - for (int i = 3; i < 6; i++) - if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]); - } - - // initial forces on barostat thermostat variables - - if (mpchain) { - etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max); - for (int ich = 1; ich < mpchain; ich++) - etap_dotdot[ich] = - (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - - boltz*t_target) / etap_mass[ich]; - } - - // compute appropriately coupled elements of mvv_current - - //if (mtk_flag) couple_ke(); - } -} - -/* ---------------------------------------------------------------------- - 1st half of Verlet update -------------------------------------------------------------------------- */ - -void FixNHCuda::initial_integrate(int vflag) -{ - if(!temperature->cudable) cuda->downloadAll(); - - if(triggerneighsq!=cuda->shared_data.atom.triggerneighsq) - { - triggerneighsq= cuda->shared_data.atom.triggerneighsq; - Cuda_FixNHCuda_Init(&cuda->shared_data,dtv,dtf); - } - - // update eta_press_dot - - if (pstat_flag && mpchain) nhc_press_integrate(); - - // update eta_dot - - if (tstat_flag) { - double delta = update->ntimestep - update->beginstep; - delta /= update->endstep - update->beginstep; - t_target = t_start + delta * (t_stop-t_start); - eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq); - for (int ich = 1; ich < mtchain; ich++) - eta_mass[ich] = boltz * t_target / (t_freq*t_freq); - nhc_temp_integrate(); - } - - // need to recompute pressure to account for change in KE - // t_current is up-to-date, but compute_temperature is not - // compute appropriately coupled elements of mvv_current - - if (pstat_flag) { - if (pstyle == ISO) { - temperature->compute_scalar(); - double tmp = pressure->compute_scalar(); - } else { - temperature->compute_vector(); - pressure->compute_vector(); - } - couple(); - pressure->addstep(update->ntimestep+1); - //if (mtk_flag) couple_ke(); - } - - if(which==NOBIAS) - { - if (pstat_flag) { - compute_press_target(); - nh_omega_dot(); - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - Cuda_FixNHCuda_nh_v_press_and_nve_v_NoBias(&cuda->shared_data, groupbit, factor,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal,(pstyle == TRICLINIC)?1:0); - } - else - Cuda_FixNHCuda_nve_v(&cuda->shared_data,groupbit,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - } - else if(which==BIAS) - { - if(pstat_flag) - { - compute_press_target(); - nh_omega_dot(); - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - if(!temperature->cudable) - { - nh_v_press(); - cuda->cu_v->upload(); - } - else - { - int groupbit_org=temperature->groupbit; - temperature->groupbit=groupbit; - temperature->remove_bias_all(); - Cuda_FixNHCuda_nh_v_press(&cuda->shared_data, groupbit, factor,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal,(pstyle == TRICLINIC)?1:0); - temperature->restore_bias_all(); - temperature->groupbit=groupbit_org; - } - } - Cuda_FixNHCuda_nve_v(&cuda->shared_data,groupbit,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - } - - // remap simulation box by 1/2 step - - if (pstat_flag) remap(); - - Cuda_FixNHCuda_nve_x(&cuda->shared_data,groupbit,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - - // remap simulation box by 1/2 step - // redo KSpace coeffs since volume has changed - - if (pstat_flag) { - remap(); - if (kspace_flag) force->kspace->setup(); - } -} - -/* ---------------------------------------------------------------------- - 2nd half of Verlet update -------------------------------------------------------------------------- */ - -void FixNHCuda::final_integrate() -{ - if(!temperature->cudable) cuda->downloadAll(); - - if(which==NOBIAS) - { - if(pstat_flag) - { - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - - Cuda_FixNHCuda_nve_v_and_nh_v_press_NoBias(&cuda->shared_data, groupbit, factor,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal,(pstyle == TRICLINIC)?1:0); - } - else - Cuda_FixNHCuda_nve_v(&cuda->shared_data,groupbit,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - } - else if(which==BIAS) - { - Cuda_FixNHCuda_nve_v(&cuda->shared_data,groupbit,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - - if(pstat_flag) - { - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - if(!temperature->cudable) - { - cuda->cu_v->download(); - nh_v_press(); - cuda->cu_v->upload(); - } - else - { - int groupbit_org=temperature->groupbit; - temperature->groupbit=groupbit; - temperature->remove_bias_all(); - Cuda_FixNHCuda_nh_v_press(&cuda->shared_data, groupbit, factor,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal,(pstyle == TRICLINIC)?1:0); - temperature->restore_bias_all(); - temperature->groupbit=groupbit_org; - } - } - } - // compute new T,P - // compute appropriately coupled elements of mvv_current - - if(!temperature->cudable) cuda->cu_v->download(); - t_current = temperature->compute_scalar(); - if (pstat_flag) { - if (pstyle == ISO) double tmp = pressure->compute_scalar(); - else pressure->compute_vector(); - couple(); - pressure->addstep(update->ntimestep+1); - } - - if (pstat_flag) nh_omega_dot(); - - // update eta_dot - // update eta_press_dot - - if (tstat_flag) nhc_temp_integrate(); - if (pstat_flag && mpchain) nhc_press_integrate(); -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::initial_integrate_respa(int vflag, int ilevel, int iloop) -{ - // set timesteps by level - - dtv = step_respa[ilevel]; - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - dthalf = 0.5 * step_respa[ilevel]; - - // outermost level - update eta_dot and omega_dot, apply to v, remap box - // all other levels - NVE update of v - // x,v updates only performed for atoms in group - - if (ilevel == nlevels_respa-1) { - - // update eta_press_dot - - if (pstat_flag && mpchain) nhc_press_integrate(); - - // update eta_dot - - if (tstat_flag) { - double delta = update->ntimestep - update->beginstep; - delta /= update->endstep - update->beginstep; - t_target = t_start + delta * (t_stop-t_start); - eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq); - for (int ich = 1; ich < mtchain; ich++) - eta_mass[ich] = boltz * t_target / (t_freq*t_freq); - nhc_temp_integrate(); - } - - // recompute pressure to account for change in KE - // t_current is up-to-date, but compute_temperature is not - // compute appropriately coupled elements of mvv_current - - if (pstat_flag) { - if (pstyle == ISO) { - temperature->compute_scalar(); - double tmp = pressure->compute_scalar(); - } else { - temperature->compute_vector(); - pressure->compute_vector(); - } - couple(); - pressure->addstep(update->ntimestep+1); - if (mtk_flag) couple_ke(); - } - - if (pstat_flag) { - compute_press_target(); - nh_omega_dot(); - nh_v_press(); - } - - nve_v(); - - } else nve_v(); - - // innermost level - also update x only for atoms in group - // if barostat, perform 1/2 step remap before and after - - if (ilevel == 0) { - if (pstat_flag) remap(); - nve_x(); - if (pstat_flag) remap(); - } - - // if barostat, redo KSpace coeffs at outermost level, - // since volume has changed - - if (ilevel == nlevels_respa-1 && kspace_flag && pstat_flag) - force->kspace->setup(); -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::final_integrate_respa(int ilevel, int iloop) -{ - // set timesteps by level - - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - dthalf = 0.5 * step_respa[ilevel]; - - // outermost level - update eta_dot and omega_dot, apply via final_integrate - // all other levels - NVE update of v - - if (ilevel == nlevels_respa-1) final_integrate(); - else nve_v(); -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::couple() -{ - double *tensor = pressure->vector; - - if (pstyle == ISO) - p_current[0] = p_current[1] = p_current[2] = pressure->scalar; - else if (pcouple == XYZ) { - double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); - p_current[0] = p_current[1] = p_current[2] = ave; - } else if (pcouple == XY) { - double ave = 0.5 * (tensor[0] + tensor[1]); - p_current[0] = p_current[1] = ave; - p_current[2] = tensor[2]; - } else if (pcouple == YZ) { - double ave = 0.5 * (tensor[1] + tensor[2]); - p_current[1] = p_current[2] = ave; - p_current[0] = tensor[0]; - } else if (pcouple == XZ) { - double ave = 0.5 * (tensor[0] + tensor[2]); - p_current[0] = p_current[2] = ave; - p_current[1] = tensor[1]; - } else { - p_current[0] = tensor[0]; - p_current[1] = tensor[1]; - p_current[2] = tensor[2]; - } - - // switch order from xy-xz-yz to Voigt - - if (pstyle == TRICLINIC) { - p_current[3] = tensor[5]; - p_current[4] = tensor[4]; - p_current[5] = tensor[3]; - } -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::couple_ke() -{ - double *tensor = temperature->vector; - if (pstyle == ISO) - mvv_current[0] = mvv_current[1] = mvv_current[2] = - tdof * boltz * t_current/dimension; - else if (pcouple == XYZ) { - double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); - mvv_current[0] = mvv_current[1] = mvv_current[2] = ave; - } else if (pcouple == XY) { - double ave = 0.5 * (tensor[0] + tensor[1]); - mvv_current[0] = mvv_current[1] = ave; - mvv_current[2] = tensor[2]; - } else if (pcouple == YZ) { - double ave = 0.5 * (tensor[1] + tensor[2]); - mvv_current[1] = mvv_current[2] = ave; - mvv_current[0] = tensor[0]; - } else if (pcouple == XZ) { - double ave = 0.5 * (tensor[0] + tensor[2]); - mvv_current[0] = mvv_current[2] = ave; - mvv_current[1] = tensor[1]; - } else { - mvv_current[0] = tensor[0]; - mvv_current[1] = tensor[1]; - mvv_current[2] = tensor[2]; - } -} - -/* ---------------------------------------------------------------------- - change box size - remap all atoms or fix group atoms depending on allremap flag - if rigid bodies exist, scale rigid body centers-of-mass -------------------------------------------------------------------------- */ - -void FixNHCuda::remap() -{ - int i; - double oldlo,oldhi,ctr; - - double **x = atom->x; - int *mask = atom->mask; - int nlocal = atom->nlocal; - double *h = domain->h; - - // omega is not used, except for book-keeping - - for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i]; - - // convert pertinent atoms and rigid bodies to lamda coords - if (allremap) domain->x2lamda(nlocal); - else { - for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - domain->x2lamda(x[i],x[i]); - } - - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(0); - - // reset global and local box to new size/shape - - // This operation corresponds to applying the - // translate and scale operations - // corresponding to the solution of the following ODE: - // - // h_dot = omega_dot * h - // - // where h_dot, omega_dot and h are all upper-triangular - // 3x3 tensors. In Voigt notation, the elements of the - // RHS product tensor are: - // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1] - // - // Ordering of operations preserves time symmetry. - - double dto2 = dto/2.0; - double dto4 = dto/4.0; - double dto8 = dto/8.0; - - if (pstyle == TRICLINIC) { - - h[4] *= exp(dto8*omega_dot[0]); - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= exp(dto8*omega_dot[0]); - - h[3] *= exp(dto4*omega_dot[1]); - h[3] += dto2*(omega_dot[3]*h[2]); - h[3] *= exp(dto4*omega_dot[1]); - - h[5] *= exp(dto4*omega_dot[0]); - h[5] += dto2*(omega_dot[5]*h[1]); - h[5] *= exp(dto4*omega_dot[0]); - - h[4] *= exp(dto8*omega_dot[0]); - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= exp(dto8*omega_dot[0]); - - } - - for (i = 0; i < 3; i++) { - if (p_flag[i]) { - oldlo = domain->boxlo[i]; - oldhi = domain->boxhi[i]; - ctr = 0.5 * (oldlo + oldhi); - domain->boxlo[i] = (oldlo-ctr)*exp(dto*omega_dot[i]) + ctr; - domain->boxhi[i] = (oldhi-ctr)*exp(dto*omega_dot[i]) + ctr; - } - } - - if (pstyle == TRICLINIC) { - - h[4] *= exp(dto8*omega_dot[0]); - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= exp(dto8*omega_dot[0]); - - h[3] *= exp(dto4*omega_dot[1]); - h[3] += dto2*(omega_dot[3]*h[2]); - h[3] *= exp(dto4*omega_dot[1]); - - h[5] *= exp(dto4*omega_dot[0]); - h[5] += dto2*(omega_dot[5]*h[1]); - h[5] *= exp(dto4*omega_dot[0]); - - h[4] *= exp(dto8*omega_dot[0]); - h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); - h[4] *= exp(dto8*omega_dot[0]); - - domain->yz = h[3]; - domain->xz = h[4]; - domain->xy = h[5]; - - if (domain->yz < -0.5*domain->yprd || domain->yz > 0.5*domain->yprd || - domain->xz < -0.5*domain->xprd || domain->xz > 0.5*domain->xprd || - domain->xy < -0.5*domain->xprd || domain->xy > 0.5*domain->xprd) - error->all(FLERR,"Fix npt/nph has tilted box too far - " - "box flips are not yet implemented"); - } - - domain->set_global_box(); - domain->set_local_box(); - - // convert pertinent atoms and rigid bodies back to box coords - - if (allremap) domain->lamda2x(nlocal); - else { - for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - domain->lamda2x(x[i],x[i]); - } - - if (nrigid) - for (i = 0; i < nrigid; i++) - modify->fix[rfix[i]]->deform(1); -} - -/* ---------------------------------------------------------------------- - pack entire state of Fix into one write -------------------------------------------------------------------------- */ - -void FixNHCuda::write_restart(FILE *fp) -{ - int nsize = 2; - if (tstat_flag) nsize += 1 + 2*mtchain; - if (pstat_flag) { - nsize += 16 + 2*mpchain; - if (deviatoric_flag) nsize += 6; - } - - double* list = (double *) memory->smalloc(nsize*sizeof(double),"nh:list"); - - int n = 0; - - list[n++] = tstat_flag; - if (tstat_flag) { - list[n++] = mtchain; - for (int ich = 0; ich < mtchain; ich++) - list[n++] = eta[ich]; - for (int ich = 0; ich < mtchain; ich++) - list[n++] = eta_dot[ich]; - } - - list[n++] = pstat_flag; - if (pstat_flag) { - list[n++] = omega[0]; - list[n++] = omega[1]; - list[n++] = omega[2]; - list[n++] = omega[3]; - list[n++] = omega[4]; - list[n++] = omega[5]; - list[n++] = omega_dot[0]; - list[n++] = omega_dot[1]; - list[n++] = omega_dot[2]; - list[n++] = omega_dot[3]; - list[n++] = omega_dot[4]; - list[n++] = omega_dot[5]; - list[n++] = vol0; - list[n++] = t0; - list[n++] = mpchain; - if (mpchain) { - for (int ich = 0; ich < mpchain; ich++) - list[n++] = etap[ich]; - for (int ich = 0; ich < mpchain; ich++) - list[n++] = etap_dot[ich]; - } - - list[n++] = deviatoric_flag; - if (deviatoric_flag) { - list[n++] = h0_inv[0]; - list[n++] = h0_inv[1]; - list[n++] = h0_inv[2]; - list[n++] = h0_inv[3]; - list[n++] = h0_inv[4]; - list[n++] = h0_inv[5]; - } - } - - if (comm->me == 0) { - int size = nsize * sizeof(double); - fwrite(&size,sizeof(int),1,fp); - fwrite(list,sizeof(double),nsize,fp); - } - - memory->sfree(list); -} - -/* ---------------------------------------------------------------------- - use state info from restart file to restart the Fix -------------------------------------------------------------------------- */ - -void FixNHCuda::restart(char *buf) -{ - int n = 0; - double *list = (double *) buf; - int flag = static_cast<int> (list[n++]); - if (flag) { - int m = static_cast<int> (list[n++]); - if (tstat_flag && m == mtchain) { - for (int ich = 0; ich < mtchain; ich++) - eta[ich] = list[n++]; - for (int ich = 0; ich < mtchain; ich++) - eta_dot[ich] = list[n++]; - } else n += 2*m; - } - flag = static_cast<int> (list[n++]); - if (flag) { - omega[0] = list[n++]; - omega[1] = list[n++]; - omega[2] = list[n++]; - omega[3] = list[n++]; - omega[4] = list[n++]; - omega[5] = list[n++]; - omega_dot[0] = list[n++]; - omega_dot[1] = list[n++]; - omega_dot[2] = list[n++]; - omega_dot[3] = list[n++]; - omega_dot[4] = list[n++]; - omega_dot[5] = list[n++]; - vol0 = list[n++]; - t0 = list[n++]; - int m = static_cast<int> (list[n++]); - if (pstat_flag && m == mpchain) { - for (int ich = 0; ich < mpchain; ich++) - etap[ich] = list[n++]; - for (int ich = 0; ich < mpchain; ich++) - etap_dot[ich] = list[n++]; - } else n+=2*m; - flag = static_cast<int> (list[n++]); - if (flag) { - h0_inv[0] = list[n++]; - h0_inv[1] = list[n++]; - h0_inv[2] = list[n++]; - h0_inv[3] = list[n++]; - h0_inv[4] = list[n++]; - h0_inv[5] = list[n++]; - } - } -} - -/* ---------------------------------------------------------------------- */ - -int FixNHCuda::modify_param(int narg, char **arg) -{ - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (tflag) { - modify->delete_compute(id_temp); - tflag = 0; - } - delete [] id_temp; - int n = strlen(arg[1]) + 1; - id_temp = new char[n]; - strcpy(id_temp,arg[1]); - - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - - if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); - if (temperature->igroup != 0 && comm->me == 0) - error->warning(FLERR,"Temperature for fix modify is not for group all"); - - // reset id_temp of pressure to new temperature ID - - if (pstat_flag) { - icompute = modify->find_compute(id_press); - if (icompute < 0) - error->all(FLERR,"Pressure ID for fix modify does not exist"); - modify->compute[icompute]->reset_extra_compute_fix(id_temp); - } - - return 2; - - } else if (strcmp(arg[0],"press") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command"); - if (pflag) { - modify->delete_compute(id_press); - pflag = 0; - } - delete [] id_press; - int n = strlen(arg[1]) + 1; - id_press = new char[n]; - strcpy(id_press,arg[1]); - - int icompute = modify->find_compute(arg[1]); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); - pressure = modify->compute[icompute]; - - if (pressure->pressflag == 0) - error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); - return 2; - } - - return 0; -} - -/* ---------------------------------------------------------------------- */ - -double FixNHCuda::compute_scalar() -{ - int i; - double volume; - double energy; - double kt = boltz * t_target; - double lkt = tdof * kt; - double lkt_press = kt; - int ich; - if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd; - else volume = domain->xprd * domain->yprd; - - energy = 0.0; - - // thermostat chain energy is equivalent to Eq. (2) in - // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 - // Sum(0.5*p_eta_k^2/Q_k,k=1,M) + L*k*T*eta_1 + Sum(k*T*eta_k,k=2,M), - // where L = tdof - // M = mtchain - // p_eta_k = Q_k*eta_dot[k-1] - // Q_1 = L*k*T/t_freq^2 - // Q_k = k*T/t_freq^2, k > 1 - - if (tstat_flag) { - energy += lkt * eta[0] + 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0]; - for (ich = 1; ich < mtchain; ich++) - energy += kt * eta[ich] + 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich]; - } - - // barostat energy is equivalent to Eq. (8) in - // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117 - // Sum(0.5*p_omega^2/W + P*V), - // where N = natoms - // p_omega = W*omega_dot - // W = N*k*T/p_freq^2 - // sum is over barostatted dimensions - - if (pstat_flag) { - for (i = 0; i < 3; i++) - if (p_flag[i]) - energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i] + - p_hydro*(volume-vol0) / (pdim*nktv2p); - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) - energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i]; - } - - // extra contributions from thermostat chain for barostat - - if (mpchain) { - energy += lkt_press * etap[0] + 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0]; - for (ich = 1; ich < mpchain; ich++) - energy += kt * etap[ich] + - 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich]; - } - - // extra contribution from strain energy - - if (deviatoric_flag) energy += compute_strain_energy(); - } - - return energy; -} - -/* ---------------------------------------------------------------------- - return a single element of the following vectors, in this order: - eta[tchain], eta_dot[tchain], omega[ndof], omega_dot[ndof] - etap[pchain], etap_dot[pchain], PE_eta[tchain], KE_eta_dot[tchain] - PE_omega[ndof], KE_omega_dot[ndof], PE_etap[pchain], KE_etap_dot[pchain] - PE_strain[1] - if no thermostat exists, related quantities are omitted from the list - if no barostat exists, related quantities are omitted from the list - ndof = 1,3,6 degrees of freedom for pstyle = ISO,ANISO,TRI -------------------------------------------------------------------------- */ - -double FixNHCuda::compute_vector(int n) -{ - int ilen; - - if (tstat_flag) { - ilen = mtchain; - if (n < ilen) return eta[n]; - n -= ilen; - ilen = mtchain; - if (n < ilen) return eta_dot[n]; - n -= ilen; - } - - if (pstat_flag) { - if (pstyle == ISO) { - ilen = 1; - if (n < ilen) return omega[n]; - n -= ilen; - } else if (pstyle == ANISO) { - ilen = 3; - if (n < ilen) return omega[n]; - n -= ilen; - } else { - ilen = 6; - if (n < ilen) return omega[n]; - n -= ilen; - } - - if (pstyle == ISO) { - ilen = 1; - if (n < ilen) return omega_dot[n]; - n -= ilen; - } else if (pstyle == ANISO) { - ilen = 3; - if (n < ilen) return omega_dot[n]; - n -= ilen; - } else { - ilen = 6; - if (n < ilen) return omega_dot[n]; - n -= ilen; - } - - if (mpchain) { - ilen = mpchain; - if (n < ilen) return etap[n]; - n -= ilen; - ilen = mpchain; - if (n < ilen) return etap_dot[n]; - n -= ilen; - } - } - - double volume; - double kt = boltz * t_target; - double lkt = tdof * kt; - double lkt_press = kt; - int ich; - if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd; - else volume = domain->xprd * domain->yprd; - - if (tstat_flag) { - ilen = mtchain; - if (n < ilen) { - ich = n; - if (ich == 0) - return lkt * eta[0]; - else - return kt * eta[ich]; - } - n -= ilen; - ilen = mtchain; - if (n < ilen) { - ich = n; - if (ich == 0) - return 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0]; - else - return 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich]; - } - n -= ilen; - } - - if (pstat_flag) { - if (pstyle == ISO) { - ilen = 1; - if (n < ilen) - return p_hydro*(volume-vol0) / nktv2p; - n -= ilen; - } else if (pstyle == ANISO) { - ilen = 3; - if (n < ilen) - if (p_flag[n]) - return p_hydro*(volume-vol0) / (pdim*nktv2p); - else - return 0.0; - n -= ilen; - } else { - ilen = 6; - if (n < ilen) - if (n > 2) return 0.0; - else if (p_flag[n]) - return p_hydro*(volume-vol0) / (pdim*nktv2p); - else - return 0.0; - n -= ilen; - } - - if (pstyle == ISO) { - ilen = 1; - if (n < ilen) - return pdim*0.5*omega_dot[n]*omega_dot[n]*omega_mass[n]; - n -= ilen; - } else if (pstyle == ANISO) { - ilen = 3; - if (n < ilen) - if (p_flag[n]) - return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n]; - else return 0.0; - n -= ilen; - } else { - ilen = 6; - if (n < ilen) - if (p_flag[n]) - return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n]; - else return 0.0; - n -= ilen; - } - - if (mpchain) { - ilen = mpchain; - if (n < ilen) { - ich = n; - if (ich == 0) return lkt_press * etap[0]; - else return kt * etap[ich]; - } - n -= ilen; - ilen = mpchain; - if (n < ilen) { - ich = n; - if (ich == 0) - return 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0]; - else - return 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich]; - } - n -= ilen; - } - - if (deviatoric_flag) { - ilen = 1; - if (n < ilen) - return compute_strain_energy(); - n -= ilen; - } - } - - return 0.0; -} - -/* ---------------------------------------------------------------------- */ - -void FixNHCuda::reset_dt() -{ - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - dthalf = 0.5 * update->dt; - dt4 = 0.25 * update->dt; - dt8 = 0.125 * update->dt; - dto = dthalf; - - // If using respa, then remap is performed in innermost level - - if (strcmp(update->integrate_style,"respa") == 0) - dto = 0.5*step_respa[0]; - - p_freq_max = 0.0; - if (pstat_flag) { - p_freq_max = MAX(p_freq[0],p_freq[1]); - p_freq_max = MAX(p_freq_max,p_freq[2]); - if (pstyle == TRICLINIC) { - p_freq_max = MAX(p_freq_max,p_freq[3]); - p_freq_max = MAX(p_freq_max,p_freq[4]); - p_freq_max = MAX(p_freq_max,p_freq[5]); - } - pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain); - } - - if (tstat_flag) - tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain); -} - -/* ---------------------------------------------------------------------- - perform half-step update of chain thermostat variables -------------------------------------------------------------------------- */ - -void FixNHCuda::nhc_temp_integrate() -{ - int ich; - double expfac; - - double lkt = tdof * boltz * t_target; - double kecurrent = tdof * boltz * t_current; - eta_dotdot[0] = (kecurrent - lkt)/eta_mass[0]; - - double ncfac = 1.0/nc_tchain; - for (int iloop = 0; iloop < nc_tchain; iloop++) { - - for (ich = mtchain-1; ich > 0; ich--) { - expfac = exp(-ncfac*dt8*eta_dot[ich+1]); - eta_dot[ich] *= expfac; - eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4; - eta_dot[ich] *= tdrag_factor; - eta_dot[ich] *= expfac; - } - - expfac = exp(-ncfac*dt8*eta_dot[1]); - eta_dot[0] *= expfac; - eta_dot[0] += eta_dotdot[0] * ncfac*dt4; - eta_dot[0] *= tdrag_factor; - eta_dot[0] *= expfac; - - factor_eta = exp(-ncfac*dthalf*eta_dot[0]); - if(which==NOBIAS) - Cuda_FixNHCuda_nh_v_temp(&cuda->shared_data,groupbit,factor_eta,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - else if(which==BIAS) - { - if(!temperature->cudable) - { - cuda->downloadAll(); - nh_v_temp(); - cuda->cu_v->upload(); - } - else - { - int groupbit_org=temperature->groupbit; - temperature->groupbit=groupbit; - temperature->remove_bias_all(); - Cuda_FixNHCuda_nh_v_temp(&cuda->shared_data,groupbit,factor_eta,(igroup == atom->firstgroup)?atom->nfirst:atom->nlocal); - temperature->restore_bias_all(); - temperature->groupbit=groupbit_org; - } - - } - // rescale temperature due to velocity scaling - // should not be necessary to explicitly recompute the temperature - - t_current *= factor_eta*factor_eta; - kecurrent = tdof * boltz * t_current; - eta_dotdot[0] = (kecurrent - lkt)/eta_mass[0]; - - for (ich = 0; ich < mtchain; ich++) - eta[ich] += ncfac*dthalf*eta_dot[ich]; - - eta_dot[0] *= expfac; - eta_dot[0] += eta_dotdot[0] * ncfac*dt4; - eta_dot[0] *= expfac; - - for (ich = 1; ich < mtchain; ich++) { - expfac = exp(-ncfac*dt8*eta_dot[ich+1]); - eta_dot[ich] *= expfac; - eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1] - - boltz * t_target)/eta_mass[ich]; - eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4; - eta_dot[ich] *= expfac; - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step update of chain thermostat variables for barostat - scale barostat velocities -------------------------------------------------------------------------- */ - -void FixNHCuda::nhc_press_integrate() -{ - int ich,i; - double expfac,factor_etap,kecurrent; - double kt = boltz * t_target; - double lkt_press = kt; - - kecurrent = 0.0; - for (i = 0; i < 3; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - } - - etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; - - double ncfac = 1.0/nc_pchain; - for (int iloop = 0; iloop < nc_pchain; iloop++) { - - for (ich = mpchain-1; ich > 0; ich--) { - expfac = exp(-ncfac*dt8*etap_dot[ich+1]); - etap_dot[ich] *= expfac; - etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; - etap_dot[ich] *= pdrag_factor; - etap_dot[ich] *= expfac; - } - - expfac = exp(-ncfac*dt8*etap_dot[1]); - etap_dot[0] *= expfac; - etap_dot[0] += etap_dotdot[0] * ncfac*dt4; - etap_dot[0] *= pdrag_factor; - etap_dot[0] *= expfac; - - for (ich = 0; ich < mpchain; ich++) - etap[ich] += ncfac*dthalf*etap_dot[ich]; - - factor_etap = exp(-ncfac*dthalf*etap_dot[0]); - for (i = 0; i < 3; i++) - if (p_flag[i]) omega_dot[i] *= factor_etap; - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) omega_dot[i] *= factor_etap; - } - - kecurrent = 0.0; - for (i = 0; i < 3; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - - if (pstyle == TRICLINIC) { - for (i = 3; i < 6; i++) - if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i]; - } - - etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; - - etap_dot[0] *= expfac; - etap_dot[0] += etap_dotdot[0] * ncfac*dt4; - etap_dot[0] *= expfac; - - for (ich = 1; ich < mpchain; ich++) { - expfac = exp(-ncfac*dt8*etap_dot[ich+1]); - etap_dot[ich] *= expfac; - etap_dotdot[ich] = - (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - boltz*t_target) / - etap_mass[ich]; - etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4; - etap_dot[ich] *= expfac; - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step barostat scaling of velocities ------------------------------------------------------------------------*/ - -void FixNHCuda::nh_v_press() -{ - double factor[3]; - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2)); - factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2)); - factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); - - if (which == NOBIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - if (pstyle == TRICLINIC) { - v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); - v[i][1] += -dthalf*v[i][2]*omega_dot[3]; - } - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - } - } - } else if (which == BIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - if (pstyle == TRICLINIC) { - v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]); - v[i][1] += -dthalf*v[i][2]*omega_dot[3]; - } - v[i][0] *= factor[0]; - v[i][1] *= factor[1]; - v[i][2] *= factor[2]; - temperature->restore_bias(i,v[i]); - } - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step update of velocities ------------------------------------------------------------------------*/ - -void FixNHCuda::nve_v() -{ - double dtfm; - double **v = atom->v; - double **f = atom->f; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - if (rmass) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - dtfm = dtf / rmass[i]; - v[i][0] += dtfm*f[i][0]; - v[i][1] += dtfm*f[i][1]; - v[i][2] += dtfm*f[i][2]; - } - } - } else { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - dtfm = dtf / mass[type[i]]; - v[i][0] += dtfm*f[i][0]; - v[i][1] += dtfm*f[i][1]; - v[i][2] += dtfm*f[i][2]; - } - } - } -} - -/* ---------------------------------------------------------------------- - perform full-step update of positions ------------------------------------------------------------------------*/ - -void FixNHCuda::nve_x() -{ - double **x = atom->x; - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - // x update by full step only for atoms in group - - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - x[i][0] += dtv * v[i][0]; - x[i][1] += dtv * v[i][1]; - x[i][2] += dtv * v[i][2]; - } - } -} - -/* ---------------------------------------------------------------------- - perform half-step thermostat scaling of velocities ------------------------------------------------------------------------*/ - -void FixNHCuda::nh_v_temp() -{ - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - if (which == NOBIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - v[i][0] *= factor_eta; - v[i][1] *= factor_eta; - v[i][2] *= factor_eta; - } - } - } else if (which == BIAS) { - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - v[i][0] *= factor_eta; - v[i][1] *= factor_eta; - v[i][2] *= factor_eta; - temperature->restore_bias(i,v[i]); - } - } - } -} - -/* ---------------------------------------------------------------------- - compute sigma tensor - needed whenever p_target or h0_inv changes ------------------------------------------------------------------------*/ - -void FixNHCuda::compute_sigma() -{ - // if nreset_h0 > 0, reset vol0 and h0_inv - // every nreset_h0 timesteps - - if (nreset_h0 > 0) { - int delta = update->ntimestep - update->beginstep; - if (delta % nreset_h0 == 0) { - if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd; - else vol0 = domain->xprd * domain->yprd; - h0_inv[0] = domain->h_inv[0]; - h0_inv[1] = domain->h_inv[1]; - h0_inv[2] = domain->h_inv[2]; - h0_inv[3] = domain->h_inv[3]; - h0_inv[4] = domain->h_inv[4]; - h0_inv[5] = domain->h_inv[5]; - } - } - - // generate upper-triangular half of - // sigma = vol0*h0inv*(p_target-p_hydro)*h0inv^t - // units of sigma are are PV/L^2 e.g. atm.A - // - // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] - // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] - // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] - - sigma[0] = - vol0*(h0_inv[0]*((p_target[0]-p_hydro)*h0_inv[0] + - p_target[5]*h0_inv[5]+p_target[4]*h0_inv[4]) + - h0_inv[5]*(p_target[5]*h0_inv[0] + - (p_target[1]-p_hydro)*h0_inv[5]+p_target[3]*h0_inv[4]) + - h0_inv[4]*(p_target[4]*h0_inv[0]+p_target[3]*h0_inv[5] + - (p_target[2]-p_hydro)*h0_inv[4])); - sigma[1] = - vol0*(h0_inv[1]*((p_target[1]-p_hydro)*h0_inv[1] + - p_target[3]*h0_inv[3]) + - h0_inv[3]*(p_target[3]*h0_inv[1] + - (p_target[2]-p_hydro)*h0_inv[3])); - sigma[2] = - vol0*(h0_inv[2]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[3] = - vol0*(h0_inv[1]*(p_target[3]*h0_inv[2]) + - h0_inv[3]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[4] = - vol0*(h0_inv[0]*(p_target[4]*h0_inv[2]) + - h0_inv[5]*(p_target[3]*h0_inv[2]) + - h0_inv[4]*((p_target[2]-p_hydro)*h0_inv[2])); - sigma[5] = - vol0*(h0_inv[0]*(p_target[5]*h0_inv[1]+p_target[4]*h0_inv[3]) + - h0_inv[5]*((p_target[1]-p_hydro)*h0_inv[1]+p_target[3]*h0_inv[3]) + - h0_inv[4]*(p_target[3]*h0_inv[1]+(p_target[2]-p_hydro)*h0_inv[3])); -} - -/* ---------------------------------------------------------------------- - compute strain energy ------------------------------------------------------------------------*/ - -double FixNHCuda::compute_strain_energy() -{ - // compute strain energy = 0.5*Tr(sigma*h*h^t) in energy units - - double* h = domain->h; - double d0,d1,d2; - - d0 = - sigma[0]*(h[0]*h[0]+h[5]*h[5]+h[4]*h[4]) + - sigma[5]*( h[1]*h[5]+h[3]*h[4]) + - sigma[4]*( h[2]*h[4]); - d1 = - sigma[5]*( h[5]*h[1]+h[4]*h[3]) + - sigma[1]*( h[1]*h[1]+h[3]*h[3]) + - sigma[3]*( h[2]*h[3]); - d2 = - sigma[4]*( h[4]*h[2]) + - sigma[3]*( h[3]*h[2]) + - sigma[2]*( h[2]*h[2]); - - double energy = 0.5*(d0+d1+d2)/nktv2p; - return energy; -} - -/* ---------------------------------------------------------------------- - compute deviatoric barostat force = h*sigma*h^t ------------------------------------------------------------------------*/ - -void FixNHCuda::compute_deviatoric() -{ - // generate upper-triangular part of h*sigma*h^t - // units of fdev are are PV, e.g. atm*A^3 - // [ 0 5 4 ] [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ] - // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ] - // [ 4 3 2 ] [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ] - - double* h = domain->h; - - fdev[0] = - h[0]*(sigma[0]*h[0]+sigma[5]*h[5]+sigma[4]*h[4]) + - h[5]*(sigma[5]*h[0]+sigma[1]*h[5]+sigma[3]*h[4]) + - h[4]*(sigma[4]*h[0]+sigma[3]*h[5]+sigma[2]*h[4]); - fdev[1] = - h[1]*( sigma[1]*h[1]+sigma[3]*h[3]) + - h[3]*( sigma[3]*h[1]+sigma[2]*h[3]); - fdev[2] = - h[2]*( sigma[2]*h[2]); - fdev[3] = - h[1]*( sigma[3]*h[2]) + - h[3]*( sigma[2]*h[2]); - fdev[4] = - h[0]*( sigma[4]*h[2]) + - h[5]*( sigma[3]*h[2]) + - h[4]*( sigma[2]*h[2]); - fdev[5] = - h[0]*( sigma[5]*h[1]+sigma[4]*h[3]) + - h[5]*( sigma[1]*h[1]+sigma[3]*h[3]) + - h[4]*( sigma[3]*h[1]+sigma[2]*h[3]); -} - -/* ---------------------------------------------------------------------- - compute hydrostatic target pressure ------------------------------------------------------------------------*/ - -void FixNHCuda::compute_press_target() -{ - double delta = update->ntimestep - update->beginstep; - if (update->endstep > update->beginstep) - delta /= update->endstep - update->beginstep; - else delta = 0.0; - - p_hydro = 0.0; - for (int i = 0; i < 3; i++) - if (p_flag[i]) { - p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); - p_hydro += p_target[i]; - } - p_hydro /= pdim; - - if (pstyle == TRICLINIC) - for (int i = 3; i < 6; i++) - p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); - - // if deviatoric, recompute sigma each time p_target changes - - if (deviatoric_flag) compute_sigma(); -} - -/* ---------------------------------------------------------------------- - update omega_dot, omega, dilation ------------------------------------------------------------------------*/ - -void FixNHCuda::nh_omega_dot() -{ - double f_omega,volume; - - if (dimension == 3) volume = domain->xprd*domain->yprd*domain->zprd; - else volume = domain->xprd*domain->yprd; - - if (deviatoric_flag) compute_deviatoric(); - - mtk_term1 = 0.0; - if (mtk_flag) - if (pstyle == ISO) { - mtk_term1 = tdof * boltz * t_current; - mtk_term1 /= pdim * atom->natoms; - } else { - double *mvv_current = temperature->vector; - for (int i = 0; i < 3; i++) - if (p_flag[i]) - mtk_term1 += mvv_current[i]; - mtk_term1 /= pdim * atom->natoms; - } - - for (int i = 0; i < 3; i++) - if (p_flag[i]) { - f_omega = (p_current[i]-p_hydro)*volume / - (omega_mass[i] * nktv2p) + mtk_term1 / omega_mass[i]; - if (deviatoric_flag) f_omega -= fdev[i]/(omega_mass[i] * nktv2p); - omega_dot[i] += f_omega*dthalf; - omega_dot[i] *= pdrag_factor; - } - - mtk_term2 = 0.0; - if (mtk_flag) { - for (int i = 0; i < 3; i++) - if (p_flag[i]) - mtk_term2 += omega_dot[i]; - mtk_term2 /= pdim * atom->natoms; - } - - if (pstyle == TRICLINIC) { - for (int i = 3; i < 6; i++) { - if (p_flag[i]) { - f_omega = p_current[i]*volume/(omega_mass[i] * nktv2p); - if (deviatoric_flag) - f_omega -= fdev[i]/(omega_mass[i] * nktv2p); - omega_dot[i] += f_omega*dthalf; - omega_dot[i] *= pdrag_factor; - } - } - } -} diff --git a/src/USER-CUDA/fix_nh_cuda.h b/src/USER-CUDA/fix_nh_cuda.h deleted file mode 100644 index 3cb97873c0..0000000000 --- a/src/USER-CUDA/fix_nh_cuda.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_FIX_NH_CUDA_H -#define LMP_FIX_NH_CUDA_H - -#include "fix.h" -#include "cuda_precision.h" - -namespace LAMMPS_NS { - -class FixNHCuda : public Fix { - public: - FixNHCuda(class LAMMPS *, int, char **); - virtual ~FixNHCuda(); - int setmask(); - virtual void init(); - void setup(int); - virtual void initial_integrate(int); - virtual void final_integrate(); - void initial_integrate_respa(int, int, int); - void final_integrate_respa(int, int); - double compute_scalar(); - double compute_vector(int); - void write_restart(FILE *); - void restart(char *); - int modify_param(int, char **); - void reset_dt(); - - protected: - class Cuda *cuda; - int dimension,which; - double dtv,dtf,dthalf,dt4,dt8,dto; - double boltz,nktv2p,tdof; - double vol0,t0; - - double t_start,t_stop; - double t_current,t_target; - double t_freq; - - int tstat_flag; // 1 if control T - int pstat_flag; // 1 if control P - - int pstyle,pcouple,allremap; - int p_flag[6]; // 1 if control P on this dim, 0 if not - double p_start[6],p_stop[6]; - double p_freq[6],p_target[6]; - double omega[6],omega_dot[6]; - double omega_mass[6]; - double p_current[6],dilation[6]; - double drag,tdrag_factor; // drag factor on particle thermostat - double pdrag_factor; // drag factor on barostat - double factor[6]; // velocity scaling due to barostat - int kspace_flag; // 1 if KSpace invoked, 0 if not - int nrigid; // number of rigid fixes - int *rfix; // indices of rigid fixes - - int nlevels_respa; - double *step_respa; - - char *id_temp,*id_press; - class Compute *temperature,*pressure; - int tflag,pflag; - - double *eta,*eta_dot; // chain thermostat for particles - double *eta_dotdot; - double *eta_mass; - int mtchain; // length of chain - - double *etap; // chain thermostat for barostat - double *etap_dot; - double *etap_dotdot; - double *etap_mass; - int mpchain; // length of chain - - int mtk_flag; // 0 if using Hoover barostat - double mtk_term1,mtk_term2; - int mtchain_default_flag; - int pdim; // number of barostatted dims - double mvv_current[3]; // diagonal of KE tensor - double mtk_factor; // MTK factor - double p_freq_max; // maximum barostat frequency - - double p_hydro; // hydrostatic target pressure - - int nc_tchain,nc_pchain; - double factor_eta; - double sigma[6]; // scaled target stress - double fdev[6]; // deviatoric force on barostat - int deviatoric_flag; // 0 if target stress tensor is hydrostatic - double h0_inv[6]; // h_inv of reference (zero strain) box - int nreset_h0; // interval for resetting h0 - - void couple(); - void couple_ke(); - void remap(); - void nhc_temp_integrate(); - void nhc_press_integrate(); - - virtual void nve_x(); // may be overwritten by child classes - virtual void nve_v(); - virtual void nh_v_press(); - virtual void nh_v_temp(); - - void compute_sigma(); - void compute_deviatoric(); - double compute_strain_energy(); - void compute_press_target(); - void nh_omega_dot(); - - X_CFLOAT triggerneighsq; -}; - -} - -#endif diff --git a/src/USER-CUDA/fix_npt_cuda.cpp b/src/USER-CUDA/fix_npt_cuda.cpp deleted file mode 100644 index ea2dd1fe04..0000000000 --- a/src/USER-CUDA/fix_npt_cuda.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstring> -#include "fix_npt_cuda.h" -#include "modify.h" -#include "error.h" - -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixNPTCuda::FixNPTCuda(LAMMPS *lmp, int narg, char **arg) : - FixNHCuda(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (!tstat_flag) - error->all(FLERR,"Temperature control must be used with fix npt"); - if (!pstat_flag) - error->all(FLERR,"Pressure control must be used with fix npt"); - - // create a new compute temp style - // id = fix-ID + temp - // compute group = all since pressure is always global (group all) - // and thus its KE/temperature contribution should use group all - - int n = strlen(id) + 6; - id_temp = new char[n]; - strcpy(id_temp,id); - strcat(id_temp,"_temp"); - - char **newarg = new char*[3]; - newarg[0] = id_temp; - newarg[1] = (char *) "all"; - newarg[2] = (char *) "temp/cuda"; - - modify->add_compute(3,newarg); - delete [] newarg; - tflag = 1; - - // create a new compute pressure style - // id = fix-ID + press, compute group = all - // pass id_temp as 4th arg to pressure constructor - - n = strlen(id) + 7; - id_press = new char[n]; - strcpy(id_press,id); - strcat(id_press,"_press"); - - newarg = new char*[4]; - newarg[0] = id_press; - newarg[1] = (char *) "all"; - newarg[2] = (char *) "pressure/cuda"; - newarg[3] = id_temp; - modify->add_compute(4,newarg); - delete [] newarg; - pflag = 1; -} diff --git a/src/USER-CUDA/fix_npt_cuda.h b/src/USER-CUDA/fix_npt_cuda.h deleted file mode 100644 index e10efb6a9a..0000000000 --- a/src/USER-CUDA/fix_npt_cuda.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(npt/cuda,FixNPTCuda) - -#else - -#ifndef LMP_FIX_NPTCuda_H -#define LMP_FIX_NPTCuda_H - -#include "fix_nh_cuda.h" - -namespace LAMMPS_NS { - -class FixNPTCuda : public FixNHCuda { - public: - FixNPTCuda(class LAMMPS *, int, char **); - ~FixNPTCuda() {} -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_nve_cuda.cpp b/src/USER-CUDA/fix_nve_cuda.cpp deleted file mode 100644 index fbe85b176f..0000000000 --- a/src/USER-CUDA/fix_nve_cuda.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdio> -#include <cstring> -#include "fix_nve_cuda.h" -#include "fix_nve_cuda_cu.h" -#include "atom.h" -#include "force.h" -#include "update.h" -#include "respa.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixNVECuda::FixNVECuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (strcmp(style,"nve/sphere") != 0 && narg < 3) - error->all(FLERR,"Illegal fix nve command"); - - time_integrate = 1; -} - -/* ---------------------------------------------------------------------- */ - -int FixNVECuda::setmask() -{ - int mask = 0; - mask |= INITIAL_INTEGRATE_CUDA; - mask |= FINAL_INTEGRATE_CUDA; - // mask |= INITIAL_INTEGRATE_RESPA_CUDA; - // mask |= FINAL_INTEGRATE_RESPA_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixNVECuda::init() -{ - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - - if (strstr(update->integrate_style,"respa")) - step_respa = ((Respa *) update->integrate)->step; - - triggerneighsq= cuda->shared_data.atom.triggerneighsq; - cuda->neighbor_decide_by_integrator=1; - Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf); - -} - -/* ---------------------------------------------------------------------- - allow for both per-type and per-atom mass -------------------------------------------------------------------------- */ - -void FixNVECuda::initial_integrate(int vflag) -{ - if(triggerneighsq!=cuda->shared_data.atom.triggerneighsq) - { - triggerneighsq= cuda->shared_data.atom.triggerneighsq; - Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf); - } - int nlocal = atom->nlocal; - if(igroup == atom->firstgroup) nlocal = atom->nfirst; - - Cuda_FixNVECuda_InitialIntegrate(& cuda->shared_data, groupbit,nlocal); -} - -/* ---------------------------------------------------------------------- */ - -void FixNVECuda::final_integrate() -{ - int nlocal = atom->nlocal; - if(igroup == atom->firstgroup) nlocal = atom->nfirst; - - Cuda_FixNVECuda_FinalIntegrate(& cuda->shared_data, groupbit,nlocal); -} - -/* ---------------------------------------------------------------------- */ - -void FixNVECuda::initial_integrate_respa(int vflag, int ilevel, int flag) -{ - //this point should not be reached yet since RESPA is not supported - if (flag) return; // only used by NPT,NPH - - dtv = step_respa[ilevel]; - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - - // innermost level - NVE update of v and x - // all other levels - NVE update of v - - if(ilevel == 0) initial_integrate(vflag); - else final_integrate(); -} - -/* ---------------------------------------------------------------------- */ - -void FixNVECuda::final_integrate_respa(int ilevel, int iloop) -{ - //this point should not be reached yet since RESPA is not supported - dtf = 0.5 * step_respa[ilevel] * force->ftm2v; - final_integrate(); -} - -/* ---------------------------------------------------------------------- */ - -void FixNVECuda::reset_dt() -{ - dtv = update->dt; - dtf = 0.5 * update->dt * force->ftm2v; - Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf); -} diff --git a/src/USER-CUDA/fix_nve_cuda.h b/src/USER-CUDA/fix_nve_cuda.h deleted file mode 100644 index 090d327db5..0000000000 --- a/src/USER-CUDA/fix_nve_cuda.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(nve/cuda,FixNVECuda) - -#else - -#ifndef LMP_FIX_NVE_CUDA_H -#define LMP_FIX_NVE_CUDA_H - -#include "fix.h" -#include "cuda_precision.h" - -namespace LAMMPS_NS { - -class FixNVECuda : public Fix -{ - public: - FixNVECuda(class LAMMPS *, int, char **); - int setmask(); - virtual void init(); - virtual void initial_integrate(int); - virtual void final_integrate(); - void initial_integrate_respa(int, int, int); - void final_integrate_respa(int, int); - void reset_dt(); - - X_CFLOAT triggerneighsq; - - protected: - class Cuda *cuda; - double dtv, dtf; - double *step_respa; - int mass_require; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_nvt_cuda.cpp b/src/USER-CUDA/fix_nvt_cuda.cpp deleted file mode 100644 index e1380b0005..0000000000 --- a/src/USER-CUDA/fix_nvt_cuda.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstring> -#include "fix_nvt_cuda.h" -#include "group.h" -#include "modify.h" -#include "error.h" - -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixNVTCuda::FixNVTCuda(LAMMPS *lmp, int narg, char **arg) : - FixNHCuda(lmp, narg, arg) -{ - if (!tstat_flag) - error->all(FLERR,"Temperature control must be used with fix nvt"); - if (pstat_flag) - error->all(FLERR,"Pressure control can not be used with fix nvt"); - - // create a new compute temp style - // id = fix-ID + temp - - int n = strlen(id) + 6; - id_temp = new char[n]; - strcpy(id_temp,id); - strcat(id_temp,"_temp"); - - char **newarg = new char*[3]; - newarg[0] = id_temp; - newarg[1] = group->names[igroup]; - newarg[2] = (char *) "temp/cuda"; - - modify->add_compute(3,newarg); - delete [] newarg; - tflag = 1; -} diff --git a/src/USER-CUDA/fix_nvt_cuda.h b/src/USER-CUDA/fix_nvt_cuda.h deleted file mode 100644 index 65f38e05d6..0000000000 --- a/src/USER-CUDA/fix_nvt_cuda.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(nvt/cuda,FixNVTCuda) - -#else - -#ifndef LMP_FIX_NVTCuda_H -#define LMP_FIX_NVTCuda_H - -#include "fix_nh_cuda.h" - -namespace LAMMPS_NS { - -class FixNVTCuda : public FixNHCuda { - public: - FixNVTCuda(class LAMMPS *, int, char **); - ~FixNVTCuda() {} -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_set_force_cuda.cpp b/src/USER-CUDA/fix_set_force_cuda.cpp deleted file mode 100644 index b7000a5548..0000000000 --- a/src/USER-CUDA/fix_set_force_cuda.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ -#include <cstring> -#include <cstdlib> -#include "fix_set_force_cuda.h" -#include "fix_set_force_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "error.h" -#include "force.h" -#include "user_cuda.h" -#include "memory.h" -#include "cuda_modify_flags.h" - - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixSetForceCuda::FixSetForceCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 6) error->all(FLERR,"Illegal fix setforce/cuda command"); - - vector_flag = 1; - size_vector = 3; - global_freq = 1; - extvector = 1; - - flagx = flagy = flagz = 1; - if (strcmp(arg[3],"NULL") == 0) flagx = 0; - else xvalue = force->numeric(FLERR,arg[3]); - if (strcmp(arg[4],"NULL") == 0) flagy = 0; - else yvalue = force->numeric(FLERR,arg[4]); - if (strcmp(arg[5],"NULL") == 0) flagz = 0; - else zvalue = force->numeric(FLERR,arg[5]); - - force_flag = 0; - foriginal[0] = foriginal[1] = foriginal[2] = 0.0; - cu_foriginal=NULL; -} - -/* ---------------------------------------------------------------------- */ - -int FixSetForceCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - mask |= THERMO_ENERGY_CUDA; - mask |= POST_FORCE_RESPA; - mask |= MIN_POST_FORCE_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::init() -{ - if(not cu_foriginal) - cu_foriginal = new cCudaData<double, F_CFLOAT, x> (foriginal,3); - if (strstr(update->integrate_style,"respa")) - nlevels_respa = ((Respa *) update->integrate)->nlevels; -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::setup(int vflag) -{ - MYDBG( printf("# CUDA: FixSetForceCuda::setup\n"); ) - - if (strstr(update->integrate_style,"verlet")) - { - Cuda_FixSetForceCuda_Init(&cuda->shared_data); - cuda->cu_f->upload(); - post_force(vflag); - cuda->cu_f->download(); - - } - else { - ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); - cuda->cu_f->download(); - post_force_respa(vflag,nlevels_respa-1,0); - cuda->cu_f->upload(); - ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); - } - MYDBG( printf("# CUDA: FixSetForceCuda::setup done\n"); ) -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::min_setup(int vflag) -{ - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::post_force(int vflag) -{ - MYDBG( printf("# CUDA: FixSetForceCuda::postforce start\n"); ) - force_flag = 0; - cu_foriginal->memset_device(0); - Cuda_FixSetForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_CFLOAT*) cu_foriginal->dev_data(),flagx,flagy,flagz); - cu_foriginal->download(); -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::post_force_respa(int vflag, int ilevel, int iloop) -{ - if (ilevel == nlevels_respa-1) post_force(vflag); - else { - cuda->cu_f->download(); - cuda->cu_mask->download(); - - double **f = atom->f; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - foriginal[0] = foriginal[1] = foriginal[2] = 0.0; - force_flag = 0; - - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - foriginal[0] += f[i][0]; - foriginal[1] += f[i][1]; - foriginal[2] += f[i][2]; - if (flagx) f[i][0] = 0.0; - if (flagy) f[i][1] = 0.0; - if (flagz) f[i][2] = 0.0; - } - cuda->cu_f->upload(); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixSetForceCuda::min_post_force(int vflag) -{ - post_force(vflag); -} - - -/* ---------------------------------------------------------------------- - return components of total force on fix group before force was changed -------------------------------------------------------------------------- */ - -double FixSetForceCuda::compute_vector(int n) -{ - // only sum across procs one time - - if (force_flag == 0) { - MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world); - force_flag = 1; - } - return foriginal_all[n+1]; -} diff --git a/src/USER-CUDA/fix_set_force_cuda.h b/src/USER-CUDA/fix_set_force_cuda.h deleted file mode 100644 index a195aec0ec..0000000000 --- a/src/USER-CUDA/fix_set_force_cuda.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(setforce/cuda,FixSetForceCuda) - -#else - -#ifndef LMP_FIX_SET_FORCE_CUDA_H -#define LMP_FIX_SET_FORCE_CUDA_H - -#include "fix.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixSetForceCuda : public Fix { - public: - FixSetForceCuda(class LAMMPS *, int, char **); - int setmask(); - void init(); - void setup(int); - void min_setup(int); - void post_force(int); - void post_force_respa(int, int, int); - void min_post_force(int); - double compute_vector(int); - - private: - class Cuda *cuda; - int flagx,flagy,flagz; - double xvalue,yvalue,zvalue; - double foriginal[3],foriginal_all[3]; - cCudaData<double , F_CFLOAT , x>* cu_foriginal; - int force_flag; - int nlevels_respa; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_shake_cuda.cpp b/src/USER-CUDA/fix_shake_cuda.cpp deleted file mode 100644 index 92274d1d46..0000000000 --- a/src/USER-CUDA/fix_shake_cuda.cpp +++ /dev/null @@ -1,2885 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cmath> -#include <cstdlib> -#include <cstring> -#include <cstdio> -#include <ctime> -#include "fix_shake_cuda.h" -#include "fix_shake_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "modify.h" -#include "domain.h" -#include "force.h" -#include "bond.h" -#include "angle.h" -#include "comm.h" -#include "group.h" -#include "fix_respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; -using namespace MathConst; - -#define BIG 1.0e20 -#define MASSDELTA 0.1 - -/* ---------------------------------------------------------------------- */ - -FixShakeCuda::FixShakeCuda(LAMMPS* lmp, int narg, char** arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if(atom->map_style != 1) - error->all(FLERR, "Fix shake/cuda needs atom map style array. In particular it does not currently work with hash-tables."); - - MPI_Comm_rank(world, &me); - MPI_Comm_size(world, &nprocs); - neighbor_step = true; - - virial_flag = 1; - create_attribute = 1; - dof_flag = 1; - - // error check - - if(atom->molecular == 0) - error->all(FLERR, "Cannot use fix shake with non-molecular system"); - - // perform initial allocation of atom-based arrays - // register with Atom class - - shake_flag = NULL; - shake_atom = shake_type = NULL; - xshake = NULL; - cu_shake_flag = NULL; - cu_shake_atom = NULL; - cu_shake_type = NULL; - cu_xshake = NULL; - cu_list = NULL; - cu_bond_distance = NULL; - cu_angle_distance = NULL; - cu_virial = new cCudaData<double , ENERGY_CFLOAT , xx >(virial, 6); - grow_arrays(atom->nmax); - atom->add_callback(0); - - // set comm size needed by this fix - - comm_forward = 3; - - // parse SHAKE args - - if(narg < 8) error->all(FLERR, "Illegal fix shake command"); - - tolerance = force->numeric(FLERR,arg[3]); - max_iter = force->inumeric(FLERR,arg[4]); - output_every = force->inumeric(FLERR,arg[5]); - - // parse SHAKE args for bond and angle types - // will be used by find_clusters - // store args for "b" "a" "t" as flags in (1:n) list for fast access - // store args for "m" in list of length nmass for looping over - // for "m" verify that atom masses have been set - - bond_flag = new int[atom->nbondtypes + 1]; - - for(int i = 1; i <= atom->nbondtypes; i++) bond_flag[i] = 0; - - angle_flag = new int[atom->nangletypes + 1]; - - for(int i = 1; i <= atom->nangletypes; i++) angle_flag[i] = 0; - - type_flag = new int[atom->ntypes + 1]; - - for(int i = 1; i <= atom->ntypes; i++) type_flag[i] = 0; - - mass_list = new double[atom->ntypes]; - nmass = 0; - - char mode = '\0'; - int next = 6; - - while(next < narg) { - - if(strcmp(arg[next], "b") == 0) mode = 'b'; - else if(strcmp(arg[next], "a") == 0) mode = 'a'; - else if(strcmp(arg[next], "t") == 0) mode = 't'; - else if(strcmp(arg[next], "m") == 0) { - mode = 'm'; - atom->check_mass(); - - } else if(mode == 'b') { - int i = force->inumeric(FLERR,arg[next]); - - if(i < 1 || i > atom->nbondtypes) - error->all(FLERR, "Invalid bond type index for fix shake"); - - bond_flag[i] = 1; - - } else if(mode == 'a') { - int i = force->inumeric(FLERR,arg[next]); - - if(i < 1 || i > atom->nangletypes) - error->all(FLERR, "Invalid angle type index for fix shake"); - - angle_flag[i] = 1; - - } else if(mode == 't') { - int i = force->inumeric(FLERR,arg[next]); - - if(i < 1 || i > atom->ntypes) - error->all(FLERR, "Invalid atom type index for fix shake"); - - type_flag[i] = 1; - - } else if(mode == 'm') { - double massone = force->numeric(FLERR,arg[next]); - - if(massone == 0.0) error->all(FLERR, "Invalid atom mass for fix shake"); - - if(nmass == atom->ntypes) error->all(FLERR, "Too many masses for fix shake"); - - mass_list[nmass++] = massone; - - } else error->all(FLERR, "Illegal fix shake command"); - - next++; - } - - // allocate bond and angle distance arrays, indexed from 1 to n - - bond_distance = new double[atom->nbondtypes + 1]; - angle_distance = new double[atom->nangletypes + 1]; - - cu_bond_distance = new cCudaData<double, X_CFLOAT, xx> (bond_distance, atom->nbondtypes + 1); - cu_angle_distance = new cCudaData<double, X_CFLOAT, xx> (angle_distance, atom->nangletypes + 1); - - // allocate statistics arrays - - if(output_every) { - int nb = atom->nbondtypes + 1; - b_count = new int[nb]; - b_count_all = new int[nb]; - b_ave = new double[nb]; - b_ave_all = new double[nb]; - b_max = new double[nb]; - b_max_all = new double[nb]; - b_min = new double[nb]; - b_min_all = new double[nb]; - - int na = atom->nangletypes + 1; - a_count = new int[na]; - a_count_all = new int[na]; - a_ave = new double[na]; - a_ave_all = new double[na]; - a_max = new double[na]; - a_max_all = new double[na]; - a_min = new double[na]; - a_min_all = new double[na]; - } - - cudable_comm = true; - // identify all SHAKE clusters - - find_clusters(); - - // initialize list of SHAKE clusters to constrain - - maxlist = 0; - list = NULL; - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); - - -} - -/* ---------------------------------------------------------------------- */ - -FixShakeCuda::~FixShakeCuda() -{ - // unregister callbacks to this fix from Atom class - - atom->delete_callback(id, 0); - - // set bond_type and angle_type back to positive for SHAKE clusters - // must set for all SHAKE bonds and angles stored by each atom - - int** bond_type = atom->bond_type; - int** angle_type = atom->angle_type; - int nlocal = atom->nlocal; - - int n; - - for(int i = 0; i < nlocal; i++) { - if(shake_flag[i] == 0) continue; - else if(shake_flag[i] == 1) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); - - if(n >= 0) angle_type[i][n] = -angle_type[i][n]; - } else if(shake_flag[i] == 2) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if(shake_flag[i] == 3) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if(shake_flag[i] == 4) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][3]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } - } - - // delete locally stored arrays - - memory->destroy(shake_flag); - memory->destroy(shake_atom); - memory->destroy(shake_type); - memory->destroy(xshake); - - delete [] bond_flag; - delete [] angle_flag; - delete [] type_flag; - delete [] mass_list; - - delete [] bond_distance; - delete [] angle_distance; - - if(output_every) { - delete [] b_count; - delete [] b_count_all; - delete [] b_ave; - delete [] b_ave_all; - delete [] b_max; - delete [] b_max_all; - delete [] b_min; - delete [] b_min_all; - - delete [] a_count; - delete [] a_count_all; - delete [] a_ave; - delete [] a_ave_all; - delete [] a_max; - delete [] a_max_all; - delete [] a_min; - delete [] a_min_all; - } - - memory->destroy(list); - - delete cu_shake_flag; - delete cu_shake_atom; - delete cu_shake_type; - delete cu_xshake; - delete cu_list; - delete cu_bond_distance; - delete cu_angle_distance; -} - -/* ---------------------------------------------------------------------- */ - -int FixShakeCuda::setmask() -{ - int mask = 0; - mask |= PRE_NEIGHBOR_CUDA; - mask |= POST_FORCE_CUDA; - mask |= POST_FORCE_RESPA; - return mask; -} - -/* ---------------------------------------------------------------------- - set bond and angle distances - this init must happen after force->bond and force->angle inits -------------------------------------------------------------------------- */ - -void FixShakeCuda::init() -{ - int i, m, flag, flag_all, type1, type2, bond1_type, bond2_type; - double rsq, angle; - - // error if more than one shake fix - - int count = 0; - - for(i = 0; i < modify->nfix; i++) - if(strcmp(modify->fix[i]->style, "shake") == 0) count++; - - if(count > 1) error->all(FLERR, "More than one fix shake"); - - // cannot use with minimization since SHAKE turns off bonds - // that should contribute to potential energy - - if(update->whichflag == 2) - error->all(FLERR, "Fix shake cannot be used with minimization"); - - // error if npt,nph fix comes before shake fix - - for(i = 0; i < modify->nfix; i++) { - if(strcmp(modify->fix[i]->style, "npt") == 0) break; - - if(strcmp(modify->fix[i]->style, "nph") == 0) break; - } - - if(i < modify->nfix) { - for(int j = i; j < modify->nfix; j++) - if(strcmp(modify->fix[j]->style, "shake") == 0) - error->all(FLERR, "Shake fix must come before NPT/NPH fix"); - } - - // if rRESPA, find associated fix that must exist - // could have changed locations in fix list since created - // set ptrs to rRESPA variables - - if(strstr(update->integrate_style, "respa")) { - for(i = 0; i < modify->nfix; i++) - if(strcmp(modify->fix[i]->style, "RESPA") == 0) ifix_respa = i; - - nlevels_respa = ((Respa*) update->integrate)->nlevels; - loop_respa = ((Respa*) update->integrate)->loop; - step_respa = ((Respa*) update->integrate)->step; - } - - // set equilibrium bond distances - - if(force->bond == NULL) - error->all(FLERR, "Bond potential must be defined for SHAKE"); - - for(i = 1; i <= atom->nbondtypes; i++) - bond_distance[i] = force->bond->equilibrium_distance(i); - - // set equilibrium angle distances - - int nlocal = atom->nlocal; - - for(i = 1; i <= atom->nangletypes; i++) { - if(angle_flag[i] == 0) continue; - - if(force->angle == NULL) - error->all(FLERR, "Angle potential must be defined for SHAKE"); - - // scan all atoms for a SHAKE angle cluster - // extract bond types for the 2 bonds in the cluster - // bond types must be same in all clusters of this angle type, - // else set error flag - - flag = 0; - bond1_type = bond2_type = 0; - - for(m = 0; m < nlocal; m++) { - if(shake_flag[m] != 1) continue; - - if(shake_type[m][2] != i) continue; - - type1 = MIN(shake_type[m][0], shake_type[m][1]); - type2 = MAX(shake_type[m][0], shake_type[m][1]); - - if(bond1_type > 0) { - if(type1 != bond1_type || type2 != bond2_type) { - flag = 1; - break; - } - } - - bond1_type = type1; - bond2_type = type2; - } - - // error check for any bond types that are not the same - - MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_MAX, world); - - if(flag_all) error->all(FLERR, "Shake angles have different bond types"); - - // insure all procs have bond types - - MPI_Allreduce(&bond1_type, &flag_all, 1, MPI_INT, MPI_MAX, world); - bond1_type = flag_all; - MPI_Allreduce(&bond2_type, &flag_all, 1, MPI_INT, MPI_MAX, world); - bond2_type = flag_all; - - // if bond types are 0, no SHAKE angles of this type exist - // just skip this angle - - if(bond1_type == 0) { - angle_distance[i] = 0.0; - continue; - } - - // compute the angle distance as a function of 2 bond distances - - angle = force->angle->equilibrium_angle(i); - rsq = 2.0 * bond_distance[bond1_type] * bond_distance[bond2_type] * - (1.0 - cos(angle)); - angle_distance[i] = sqrt(rsq); - } -} - -/* ---------------------------------------------------------------------- - SHAKE as pre-integrator constraint -------------------------------------------------------------------------- */ - -void FixShakeCuda::setup(int vflag) -{ - pre_neighbor(); - - if(output_every) stats(); - - // setup SHAKE output - - int ntimestep = update->ntimestep; - next_output = ntimestep + output_every; - - if(output_every == 0) next_output = update->laststep + 1; - - if(output_every && ntimestep % output_every != 0) - next_output = (ntimestep / output_every) * output_every + output_every; - - // half timestep constraint on pre-step, full timestep thereafter - - if(strstr(update->integrate_style, "verlet")) { - dtv = update->dt; - dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; - post_force(vflag); - dtfsq = update->dt * update->dt * force->ftm2v; - } else { - dtv = step_respa[0]; - dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v; - dtf_inner = dtf_innerhalf; - ((Respa*) update->integrate)->copy_flevel_f(nlevels_respa - 1); - post_force_respa(vflag, nlevels_respa - 1, 0); - ((Respa*) update->integrate)->copy_f_flevel(nlevels_respa - 1); - dtf_inner = step_respa[0] * force->ftm2v; - } - - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); -} - -/* ---------------------------------------------------------------------- - build list of SHAKE clusters to constrain - if one or more atoms in cluster are on this proc, - this proc lists the cluster exactly once -------------------------------------------------------------------------- */ - -void FixShakeCuda::pre_neighbor() -{ - int atom1, atom2, atom3, atom4; - - // local copies of atom quantities - // used by SHAKE until next re-neighboring - - x = atom->x; - v = atom->v; - f = atom->f; - mass = atom->mass; - rmass = atom->rmass; - type = atom->type; - nlocal = atom->nlocal; - - // extend size of SHAKE list if necessary - - if(nlocal > maxlist) { - maxlist = nlocal; - memory->destroy(list); - memory->create(list, maxlist, "shake:list"); - delete cu_list; - cu_list = new cCudaData<int , int , xx >(list, maxlist); - } - - // build list of SHAKE clusters I compute - - nlist = 0; - int count2 = 0, count3 = 0, count4 = 0, count3a = 0; - - for(int i = 0; i < nlocal; i++) - if(shake_flag[i]) { - if(shake_flag[i] == 2) count2++; - - if(shake_flag[i] == 3) count3++; - - if(shake_flag[i] == 4) count4++; - - if(shake_flag[i] == 1) count3a++; - - if(shake_flag[i] == 2) { - atom1 = atom->map(shake_atom[i][0]); - atom2 = atom->map(shake_atom[i][1]); - - if(atom1 == -1 || atom2 == -1) { - char str[128]; - sprintf(str, - "Shake atoms %d %d missing on proc %d at step " BIGINT_FORMAT, - shake_atom[i][0], shake_atom[i][1], me, update->ntimestep); - error->one(FLERR, str); - } - - if(i <= atom1 && i <= atom2) list[nlist++] = i; - } else if(shake_flag[i] % 2 == 1) { - atom1 = atom->map(shake_atom[i][0]); - atom2 = atom->map(shake_atom[i][1]); - atom3 = atom->map(shake_atom[i][2]); - - if(atom1 == -1 || atom2 == -1 || atom3 == -1) { - char str[128]; - sprintf(str, - "Shake atoms %d %d %d missing on proc %d at step " - BIGINT_FORMAT, - shake_atom[i][0], shake_atom[i][1], shake_atom[i][2], - me, update->ntimestep); - error->one(FLERR, str); - } - - if(i <= atom1 && i <= atom2 && i <= atom3) list[nlist++] = i; - } else { - atom1 = atom->map(shake_atom[i][0]); - atom2 = atom->map(shake_atom[i][1]); - atom3 = atom->map(shake_atom[i][2]); - atom4 = atom->map(shake_atom[i][3]); - - if(atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) { - char str[128]; - sprintf(str, - "Shake atoms %d %d %d %d missing on proc %d at step " - BIGINT_FORMAT, - shake_atom[i][0], shake_atom[i][1], - shake_atom[i][2], shake_atom[i][3], - me, update->ntimestep); - error->one(FLERR, str); - } - - if(i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) - list[nlist++] = i; - } - } - - count2 /= 2; - count3 /= 3; - count4 /= 4; - count3a /= 3; - count3 += count2; - count4 += count3; - count3a += count4; - - for(int k = 0, l = count2; k < count2; k++) { - if(shake_flag[list[k]] != 2) { - while(shake_flag[list[l]] != 2 && l < nlist - 1) l++; - - if(shake_flag[list[l]] != 2) { - printf("FixShakeCuda: Error in List SortA %i %i\n", k, l); - return; - } - - int tmp = list[k]; - list[k] = list[l]; - list[l] = tmp; - } - } - - for(int k = count2, l = count3; k < count3; k++) { - if(shake_flag[list[k]] != 3) { - while(shake_flag[list[l]] != 3 && l < nlist - 1) l++; - - if(shake_flag[list[l]] != 3) { - printf("FixShakeCuda: Error in List SortB %i %i\n", k, l); - return; - } - - int tmp = list[k]; - list[k] = list[l]; - list[l] = tmp; - } - } - - for(int k = count3, l = count4; k < count4; k++) { - if(shake_flag[list[k]] != 4) { - while(shake_flag[list[l]] != 4 && l < nlist - 1) l++; - - if(shake_flag[list[l]] != 4) { - printf("FixShakeCuda: Error in List SortC %i %i\n", k, l); - return; - } - - int tmp = list[k]; - list[k] = list[l]; - list[l] = tmp; - } - } - - cu_list->upload(); - cu_bond_distance->upload(); - cu_angle_distance->upload(); - cu_shake_flag->upload(); - cu_shake_atom->upload(); - cu_shake_type->upload(); - - neighbor_step = true; -} - -/* ---------------------------------------------------------------------- - compute the force adjustment for SHAKE constraint -------------------------------------------------------------------------- */ - -void FixShakeCuda::post_force(int vflag) -{ - my_times starttime; - my_times endtime; - - - if(cuda->finished_setup && neighbor_step) { - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); - - } - - if(not cuda->finished_setup) - cuda->downloadAll(); - - if(update->ntimestep == next_output) { - if(cuda->finished_setup) - cuda->cu_x->download(); - - stats(); - } - - // xshake = unconstrained move with current v,f - - unconstrained_update(); - - // communicate results if necessary - - //if(cuda->finished_setup) cu_xshake->download(); - - if(nprocs > 1) { - //if(cuda->finished_setup) - //cu_xshake->download(); - comm->forward_comm_fix(this); - //if(cuda->finished_setup) - //cu_xshake->upload(); - } - - // virial setup - - if(vflag) v_setup(vflag); - else evflag = 0; - - // loop over clusters - - my_gettime(CLOCK_REALTIME, &starttime); - - if(cuda->finished_setup) { - cu_virial->upload(); - - if(vflag_atom) cuda->cu_vatom->upload(); - - Cuda_FixShakeCuda_Shake(&cuda->shared_data, vflag, vflag_atom, (int*)cu_list->dev_data(), nlist); - cu_virial->download(); - - if(vflag_atom) cuda->cu_vatom->download(); - - } else - for(int i = 0; i < nlist; i++) { - int m = list[i]; - - if(shake_flag[m] == 2) shake2(m); - else if(shake_flag[m] == 3) shake3(m); - else if(shake_flag[m] == 4) shake4(m); - else shake3angle(m); - } - - if((not cuda->finished_setup)) cuda->cu_f->upload(); - - my_gettime(CLOCK_REALTIME, &endtime); - - if(cuda->finished_setup) - time_postforce += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); - else - time_postforce = 0.0; - - //printf("Postforce time: %lf\n",time_postforce); -} - -/* ---------------------------------------------------------------------- - count # of degrees-of-freedom removed by SHAKE for atoms in igroup -------------------------------------------------------------------------- */ - -int FixShakeCuda::dof(int igroup) -{ - int groupbit = group->bitmask[igroup]; - - int* mask = atom->mask; - int* tag = atom->tag; - int nlocal = atom->nlocal; - - // count dof in a cluster if and only if - // the central atom is in group and atom i is the central atom - - int n = 0; - - for(int i = 0; i < nlocal; i++) { - if(!(mask[i] & groupbit)) continue; - - if(shake_flag[i] == 0) continue; - - if(shake_atom[i][0] != tag[i]) continue; - - if(shake_flag[i] == 1) n += 3; - else if(shake_flag[i] == 2) n += 1; - else if(shake_flag[i] == 3) n += 2; - else if(shake_flag[i] == 4) n += 3; - } - - int nall; - MPI_Allreduce(&n, &nall, 1, MPI_INT, MPI_SUM, world); - return nall; -} - -/* ---------------------------------------------------------------------- - identify whether each atom is in a SHAKE cluster - only include atoms in fix group and those bonds/angles specified in input - test whether all clusters are valid - set shake_flag, shake_atom, shake_type values - set bond,angle types negative so will be ignored in neighbor lists -------------------------------------------------------------------------- */ - -void FixShakeCuda::find_clusters() -{ - int i, j, m, n; - int flag, flag_all, messtag, loop, nbuf, nbufmax, size; - double massone; - int* buf, *bufcopy; - MPI_Request request; - MPI_Status status; - - if(me == 0 && screen) fprintf(screen, "Finding SHAKE clusters ...\n"); - - // local copies of atom ptrs - - int* tag = atom->tag; - int* type = atom->type; - int* mask = atom->mask; - double* mass = atom->mass; - double* rmass = atom->rmass; - int** bond_type = atom->bond_type; - int** angle_type = atom->angle_type; - int** nspecial = atom->nspecial; - int** special = atom->special; - int nlocal = atom->nlocal; - - // setup ring of procs - - int next = me + 1; - int prev = me - 1; - - if(next == nprocs) next = 0; - - if(prev < 0) prev = nprocs - 1; - - // ----------------------------------------------------- - // allocate arrays for self (1d) and bond partners (2d) - // max = max # of bond partners for owned atoms = 2nd dim of partner arrays - // npartner[i] = # of bonds attached to atom i - // nshake[i] = # of SHAKE bonds attached to atom i - // partner_tag[i][] = global IDs of each partner - // partner_mask[i][] = mask of each partner - // partner_type[i][] = type of each partner - // partner_massflag[i][] = 1 if partner meets mass criterion, 0 if not - // partner_bondtype[i][] = type of bond attached to each partner - // partner_shake[i][] = 1 if SHAKE bonded to partner, 0 if not - // partner_nshake[i][] = nshake value for each partner - // ----------------------------------------------------- - - int max = 0; - - for(i = 0; i < nlocal; i++) max = MAX(max, nspecial[i][0]); - - int* npartner, *nshake; - memory->create(npartner, nlocal, "shake:npartner"); - memory->create(nshake, nlocal, "shake:nshake"); - - int** partner_tag, **partner_mask, **partner_type, **partner_massflag; - int** partner_bondtype, **partner_shake, **partner_nshake; - memory->create(partner_tag, nlocal, max, "shake:partner_tag"); - memory->create(partner_mask, nlocal, max, "shake:partner_mask"); - memory->create(partner_type, nlocal, max, "shake:partner_type"); - memory->create(partner_massflag, nlocal, max, "shake:partner_massflag"); - memory->create(partner_bondtype, nlocal, max, "shake:partner_bondtype"); - memory->create(partner_shake, nlocal, max, "shake:partner_shake"); - memory->create(partner_nshake, nlocal, max, "shake:partner_nshake"); - - // ----------------------------------------------------- - // set npartner and partner_tag from special arrays - // ----------------------------------------------------- - - for(i = 0; i < nlocal; i++) { - npartner[i] = nspecial[i][0]; - - for(j = 0; j < npartner[i]; j++) partner_tag[i][j] = special[i][j]; - } - - // ----------------------------------------------------- - // set partner_mask, partner_type, partner_massflag, partner_bondtype - // for bonded partners - // requires communication for off-proc partners - // ----------------------------------------------------- - - // fill in mask, type, massflag, bondtype if own bond partner - // info to store in buf for each off-proc bond = nper = 6 - // 2 atoms IDs in bond, space for mask, type, massflag, bondtype - // nbufmax = largest buffer needed to hold info from any proc - - int nper = 6; - - nbuf = 0; - - for(i = 0; i < nlocal; i++) { - for(j = 0; j < npartner[i]; j++) { - partner_mask[i][j] = 0; - partner_type[i][j] = 0; - partner_massflag[i][j] = 0; - partner_bondtype[i][j] = 0; - - m = atom->map(partner_tag[i][j]); - - if(m >= 0 && m < nlocal) { - partner_mask[i][j] = mask[m]; - partner_type[i][j] = type[m]; - - if(nmass) { - if(rmass) massone = rmass[m]; - else massone = mass[type[m]]; - - partner_massflag[i][j] = masscheck(massone); - } - - n = bondfind(i, tag[i], partner_tag[i][j]); - - if(n >= 0) partner_bondtype[i][j] = bond_type[i][n]; - else { - n = bondfind(m, tag[i], partner_tag[i][j]); - - if(n >= 0) partner_bondtype[i][j] = bond_type[m][n]; - } - } else nbuf += nper; - } - } - - MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); - - buf = new int[nbufmax]; - bufcopy = new int[nbufmax]; - - // fill buffer with info - - size = 0; - - for(i = 0; i < nlocal; i++) { - for(j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - - if(m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size + 1] = partner_tag[i][j]; - buf[size + 2] = 0; - buf[size + 3] = 0; - buf[size + 4] = 0; - n = bondfind(i, tag[i], partner_tag[i][j]); - - if(n >= 0) buf[size + 5] = bond_type[i][n]; - else buf[size + 5] = 0; - - size += nper; - } - } - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan bond partner IDs for atoms I own - // if I own partner: - // fill in mask and type and massflag - // search for bond with 1st atom and fill in bondtype - - messtag = 1; - - for(loop = 0; loop < nprocs; loop++) { - i = 0; - - while(i < size) { - m = atom->map(buf[i + 1]); - - if(m >= 0 && m < nlocal) { - buf[i + 2] = mask[m]; - buf[i + 3] = type[m]; - - if(nmass) { - if(rmass) massone = rmass[m]; - else massone = mass[type[m]]; - - buf[i + 4] = masscheck(massone); - } - - if(buf[i + 5] == 0) { - n = bondfind(m, buf[i], buf[i + 1]); - - if(n >= 0) buf[i + 5] = bond_type[m][n]; - } - } - - i += nper; - } - - if(me != next) { - MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); - MPI_Send(buf, size, MPI_INT, next, messtag, world); - MPI_Wait(&request, &status); - MPI_Get_count(&status, MPI_INT, &size); - - for(j = 0; j < size; j++) buf[j] = bufcopy[j]; - } - } - - // store partner info returned to me - - m = 0; - - while(m < size) { - i = atom->map(buf[m]); - - for(j = 0; j < npartner[i]; j++) - if(buf[m + 1] == partner_tag[i][j]) break; - - partner_mask[i][j] = buf[m + 2]; - partner_type[i][j] = buf[m + 3]; - partner_massflag[i][j] = buf[m + 4]; - partner_bondtype[i][j] = buf[m + 5]; - m += nper; - } - - delete [] buf; - delete [] bufcopy; - - // error check for unfilled partner info - // if partner_type not set, is an error - // partner_bondtype may not be set if special list is not consistent - // with bondatom (e.g. due to delete_bonds command) - // this is OK if one or both atoms are not in fix group, since - // bond won't be SHAKEn anyway - // else it's an error - - flag = 0; - - for(i = 0; i < nlocal; i++) - for(j = 0; j < npartner[i]; j++) { - if(partner_type[i][j] == 0) flag = 1; - - if(!(mask[i] & groupbit)) continue; - - if(!(partner_mask[i][j] & groupbit)) continue; - - if(partner_bondtype[i][j] == 0) flag = 1; - } - - MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); - - if(flag_all) error->all(FLERR, "Did not find fix shake partner info"); - - // ----------------------------------------------------- - // identify SHAKEable bonds - // set nshake[i] = # of SHAKE bonds attached to atom i - // set partner_shake[i][] = 1 if SHAKE bonded to partner, 0 if not - // both atoms must be in group, bondtype must be > 0 - // check if bondtype is in input bond_flag - // check if type of either atom is in input type_flag - // check if mass of either atom is in input mass_list - // ----------------------------------------------------- - - int np; - - for(i = 0; i < nlocal; i++) { - nshake[i] = 0; - np = npartner[i]; - - for(j = 0; j < np; j++) { - partner_shake[i][j] = 0; - - if(!(mask[i] & groupbit)) continue; - - if(!(partner_mask[i][j] & groupbit)) continue; - - if(partner_bondtype[i][j] <= 0) continue; - - if(bond_flag[partner_bondtype[i][j]]) { - partner_shake[i][j] = 1; - nshake[i]++; - continue; - } - - if(type_flag[type[i]] || type_flag[partner_type[i][j]]) { - partner_shake[i][j] = 1; - nshake[i]++; - continue; - } - - if(nmass) { - if(partner_massflag[i][j]) { - partner_shake[i][j] = 1; - nshake[i]++; - continue; - } else { - if(rmass) massone = rmass[i]; - else massone = mass[type[i]]; - - if(masscheck(massone)) { - partner_shake[i][j] = 1; - nshake[i]++; - continue; - } - } - } - } - } - - // ----------------------------------------------------- - // set partner_nshake for bonded partners - // requires communication for off-proc partners - // ----------------------------------------------------- - - // fill in partner_nshake if own bond partner - // info to store in buf for each off-proc bond = - // 2 atoms IDs in bond, space for nshake value - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - - for(i = 0; i < nlocal; i++) { - for(j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - - if(m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m]; - else nbuf += 3; - } - } - - MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); - - buf = new int[nbufmax]; - bufcopy = new int[nbufmax]; - - // fill buffer with info - - size = 0; - - for(i = 0; i < nlocal; i++) { - for(j = 0; j < npartner[i]; j++) { - m = atom->map(partner_tag[i][j]); - - if(m < 0 || m >= nlocal) { - buf[size] = tag[i]; - buf[size + 1] = partner_tag[i][j]; - size += 3; - } - } - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan bond partner IDs for atoms I own - // if I own partner, fill in nshake value - - messtag = 2; - - for(loop = 0; loop < nprocs; loop++) { - i = 0; - - while(i < size) { - m = atom->map(buf[i + 1]); - - if(m >= 0 && m < nlocal) buf[i + 2] = nshake[m]; - - i += 3; - } - - if(me != next) { - MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); - MPI_Send(buf, size, MPI_INT, next, messtag, world); - MPI_Wait(&request, &status); - MPI_Get_count(&status, MPI_INT, &size); - - for(j = 0; j < size; j++) buf[j] = bufcopy[j]; - } - } - - // store partner info returned to me - - m = 0; - - while(m < size) { - i = atom->map(buf[m]); - - for(j = 0; j < npartner[i]; j++) - if(buf[m + 1] == partner_tag[i][j]) break; - - partner_nshake[i][j] = buf[m + 2]; - m += 3; - } - - delete [] buf; - delete [] bufcopy; - - // ----------------------------------------------------- - // error checks - // no atom with nshake > 3 - // no connected atoms which both have nshake > 1 - // ----------------------------------------------------- - - flag = 0; - - for(i = 0; i < nlocal; i++) if(nshake[i] > 3) flag = 1; - - MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); - - if(flag_all) error->all(FLERR, "Shake cluster of more than 4 atoms"); - - flag = 0; - - for(i = 0; i < nlocal; i++) { - if(nshake[i] <= 1) continue; - - for(j = 0; j < npartner[i]; j++) - if(partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1; - } - - MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_SUM, world); - - if(flag_all) error->all(FLERR, "Shake clusters are connected"); - - // ----------------------------------------------------- - // set SHAKE arrays that are stored with atoms & add angle constraints - // zero shake arrays for all owned atoms - // if I am central atom set shake_flag & shake_atom & shake_type - // for 2-atom clusters, I am central atom if my atom ID < partner ID - // for 3-atom clusters, test for angle constraint - // angle will be stored by this atom if it exists - // if angle type matches angle_flag, then it is angle-constrained - // shake_flag[] = 0 if atom not in SHAKE cluster - // 2,3,4 = size of bond-only cluster - // 1 = 3-atom angle cluster - // shake_atom[][] = global IDs of 2,3,4 atoms in cluster - // central atom is 1st - // for 2-atom cluster, lowest ID is 1st - // shake_type[][] = bondtype of each bond in cluster - // for 3-atom angle cluster, 3rd value is angletype - // ----------------------------------------------------- - - for(i = 0; i < nlocal; i++) { - shake_flag[i] = 0; - shake_atom[i][0] = 0; - shake_atom[i][1] = 0; - shake_atom[i][2] = 0; - shake_atom[i][3] = 0; - shake_type[i][0] = 0; - shake_type[i][1] = 0; - shake_type[i][2] = 0; - - if(nshake[i] == 1) { - for(j = 0; j < npartner[i]; j++) - if(partner_shake[i][j]) break; - - if(partner_nshake[i][j] == 1 && tag[i] < partner_tag[i][j]) { - shake_flag[i] = 2; - shake_atom[i][0] = tag[i]; - shake_atom[i][1] = partner_tag[i][j]; - shake_type[i][0] = partner_bondtype[i][j]; - } - } - - if(nshake[i] > 1) { - shake_flag[i] = 1; - shake_atom[i][0] = tag[i]; - - for(j = 0; j < npartner[i]; j++) - if(partner_shake[i][j]) { - m = shake_flag[i]; - shake_atom[i][m] = partner_tag[i][j]; - shake_type[i][m - 1] = partner_bondtype[i][j]; - shake_flag[i]++; - } - } - - if(nshake[i] == 2) { - n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); - - if(n < 0) continue; - - if(angle_type[i][n] < 0) continue; - - if(angle_flag[angle_type[i][n]]) { - shake_flag[i] = 1; - shake_type[i][2] = angle_type[i][n]; - } - } - } - - // ----------------------------------------------------- - // set shake_flag,shake_atom,shake_type for non-central atoms - // requires communication for off-proc atoms - // ----------------------------------------------------- - - // fill in shake arrays for each bond partner I own - // info to store in buf for each off-proc bond = - // all values from shake_flag, shake_atom, shake_type - // nbufmax = largest buffer needed to hold info from any proc - - nbuf = 0; - - for(i = 0; i < nlocal; i++) { - if(shake_flag[i] == 0) continue; - - for(j = 0; j < npartner[i]; j++) { - if(partner_shake[i][j] == 0) continue; - - m = atom->map(partner_tag[i][j]); - - if(m >= 0 && m < nlocal) { - shake_flag[m] = shake_flag[i]; - shake_atom[m][0] = shake_atom[i][0]; - shake_atom[m][1] = shake_atom[i][1]; - shake_atom[m][2] = shake_atom[i][2]; - shake_atom[m][3] = shake_atom[i][3]; - shake_type[m][0] = shake_type[i][0]; - shake_type[m][1] = shake_type[i][1]; - shake_type[m][2] = shake_type[i][2]; - } else nbuf += 9; - } - } - - MPI_Allreduce(&nbuf, &nbufmax, 1, MPI_INT, MPI_MAX, world); - - buf = new int[nbufmax]; - bufcopy = new int[nbufmax]; - - // fill buffer with info - - size = 0; - - for(i = 0; i < nlocal; i++) { - if(shake_flag[i] == 0) continue; - - for(j = 0; j < npartner[i]; j++) { - if(partner_shake[i][j] == 0) continue; - - m = atom->map(partner_tag[i][j]); - - if(m < 0 || m >= nlocal) { - buf[size] = partner_tag[i][j]; - buf[size + 1] = shake_flag[i]; - buf[size + 2] = shake_atom[i][0]; - buf[size + 3] = shake_atom[i][1]; - buf[size + 4] = shake_atom[i][2]; - buf[size + 5] = shake_atom[i][3]; - buf[size + 6] = shake_type[i][0]; - buf[size + 7] = shake_type[i][1]; - buf[size + 8] = shake_type[i][2]; - size += 9; - } - } - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan for ID that I own - // if I own ID, fill in shake array values - - messtag = 3; - - for(loop = 0; loop < nprocs; loop++) { - i = 0; - - while(i < size) { - m = atom->map(buf[i]); - - if(m >= 0 && m < nlocal) { - shake_flag[m] = buf[i + 1]; - shake_atom[m][0] = buf[i + 2]; - shake_atom[m][1] = buf[i + 3]; - shake_atom[m][2] = buf[i + 4]; - shake_atom[m][3] = buf[i + 5]; - shake_type[m][0] = buf[i + 6]; - shake_type[m][1] = buf[i + 7]; - shake_type[m][2] = buf[i + 8]; - } - - i += 9; - } - - if(me != next) { - MPI_Irecv(bufcopy, nbufmax, MPI_INT, prev, messtag, world, &request); - MPI_Send(buf, size, MPI_INT, next, messtag, world); - MPI_Wait(&request, &status); - MPI_Get_count(&status, MPI_INT, &size); - - for(j = 0; j < size; j++) buf[j] = bufcopy[j]; - } - } - - delete [] buf; - delete [] bufcopy; - - // ----------------------------------------------------- - // free local memory - // ----------------------------------------------------- - - memory->destroy(npartner); - memory->destroy(nshake); - memory->destroy(partner_tag); - memory->destroy(partner_mask); - memory->destroy(partner_type); - memory->destroy(partner_massflag); - memory->destroy(partner_bondtype); - memory->destroy(partner_shake); - memory->destroy(partner_nshake); - - // ----------------------------------------------------- - // set bond_type and angle_type negative for SHAKE clusters - // must set for all SHAKE bonds and angles stored by each atom - // ----------------------------------------------------- - - for(i = 0; i < nlocal; i++) { - if(shake_flag[i] == 0) continue; - else if(shake_flag[i] == 1) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = anglefind(i, shake_atom[i][1], shake_atom[i][2]); - - if(n >= 0) angle_type[i][n] = -angle_type[i][n]; - } else if(shake_flag[i] == 2) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if(shake_flag[i] == 3) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } else if(shake_flag[i] == 4) { - n = bondfind(i, shake_atom[i][0], shake_atom[i][1]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][2]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - - n = bondfind(i, shake_atom[i][0], shake_atom[i][3]); - - if(n >= 0) bond_type[i][n] = -bond_type[i][n]; - } - } - - // ----------------------------------------------------- - // print info on SHAKE clusters - // ----------------------------------------------------- - - int count1, count2, count3, count4; - count1 = count2 = count3 = count4 = 0; - - for(i = 0; i < nlocal; i++) { - if(shake_flag[i] == 1) count1++; - else if(shake_flag[i] == 2) count2++; - else if(shake_flag[i] == 3) count3++; - else if(shake_flag[i] == 4) count4++; - } - - for(int i = 0; i < nlocal; i++) { - } - - - int tmp; - tmp = count1; - MPI_Allreduce(&tmp, &count1, 1, MPI_INT, MPI_SUM, world); - tmp = count2; - MPI_Allreduce(&tmp, &count2, 1, MPI_INT, MPI_SUM, world); - tmp = count3; - MPI_Allreduce(&tmp, &count3, 1, MPI_INT, MPI_SUM, world); - tmp = count4; - MPI_Allreduce(&tmp, &count4, 1, MPI_INT, MPI_SUM, world); - - if(me == 0) { - if(screen) { - fprintf(screen, " %d = # of size 2 clusters\n", count2 / 2); - fprintf(screen, " %d = # of size 3 clusters\n", count3 / 3); - fprintf(screen, " %d = # of size 4 clusters\n", count4 / 4); - fprintf(screen, " %d = # of frozen angles\n", count1 / 3); - } - - if(logfile) { - fprintf(logfile, " %d = # of size 2 clusters\n", count2 / 2); - fprintf(logfile, " %d = # of size 3 clusters\n", count3 / 3); - fprintf(logfile, " %d = # of size 4 clusters\n", count4 / 4); - fprintf(logfile, " %d = # of frozen angles\n", count1 / 3); - } - } - - cu_shake_flag->upload(); - cu_shake_atom->upload(); - cu_shake_type->upload(); - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); - -} - -void FixShakeCuda::swap_clusters(int i, int j) -{ - int tmp; - tmp = shake_flag[i]; - shake_flag[i] = shake_flag[j]; - shake_flag[j] = tmp; - tmp = shake_atom[i][0]; - shake_atom[i][0] = shake_atom[j][0]; - shake_atom[j][0] = tmp; - tmp = shake_atom[i][1]; - shake_atom[i][1] = shake_atom[j][1]; - shake_atom[j][1] = tmp; - tmp = shake_atom[i][2]; - shake_atom[i][2] = shake_atom[j][2]; - shake_atom[j][2] = tmp; - tmp = shake_atom[i][3]; - shake_atom[i][3] = shake_atom[j][3]; - shake_atom[j][3] = tmp; - tmp = shake_type[i][0]; - shake_type[i][0] = shake_type[j][0]; - shake_type[j][0] = tmp; - tmp = shake_type[i][1]; - shake_type[i][1] = shake_type[j][1]; - shake_type[j][1] = tmp; - tmp = shake_type[i][2]; - shake_type[i][2] = shake_type[j][2]; - shake_type[j][2] = tmp; -} - -/* ---------------------------------------------------------------------- - check if massone is within MASSDELTA of any mass in mass_list - return 1 if yes, 0 if not -------------------------------------------------------------------------- */ - -int FixShakeCuda::masscheck(double massone) -{ - for(int i = 0; i < nmass; i++) - if(fabs(mass_list[i] - massone) <= MASSDELTA) return 1; - - return 0; -} - -/* ---------------------------------------------------------------------- - update the unconstrained position of each atom - only for SHAKE clusters, else set to 0.0 - assumes NVE update, seems to be accurate enough for NVT,NPT,NPH as well -------------------------------------------------------------------------- */ - -void FixShakeCuda::unconstrained_update() -{ - if(cuda->finished_setup) { - Cuda_FixShakeCuda_UnconstrainedUpdate(&cuda->shared_data); - return; - } - - double dtfmsq; - - if(rmass) { - for(int i = 0; i < nlocal; i++) { - if(shake_flag[i]) { - dtfmsq = dtfsq / rmass[i]; - xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; - xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; - xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; - } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; - } - } else { - for(int i = 0; i < nlocal; i++) { - if(shake_flag[i]) { - dtfmsq = dtfsq / mass[type[i]]; - xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; - xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; - xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; - } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; - } - } - - cu_xshake->upload(); -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::shake2(int m) -{ - int nlist, list[2]; - double v[6]; - double invmass0, invmass1; - - // local atom IDs and constraint distances - - int i0 = atom->map(shake_atom[m][0]); - int i1 = atom->map(shake_atom[m][1]); - double bond1 = bond_distance[shake_type[m][0]]; - - // r01 = distance vec between atoms, with PBC - - double r01[3]; - r01[0] = x[i0][0] - x[i1][0]; - r01[1] = x[i0][1] - x[i1][1]; - r01[2] = x[i0][2] - x[i1][2]; - domain->minimum_image(r01); - - // s01 = distance vec after unconstrained update, with PBC - - double s01[3]; - s01[0] = xshake[i0][0] - xshake[i1][0]; - s01[1] = xshake[i0][1] - xshake[i1][1]; - s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); - - // scalar distances between atoms - - double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; - double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; - - // a,b,c = coeffs in quadratic equation for lamda - - if(rmass) { - invmass0 = 1.0 / rmass[i0]; - invmass1 = 1.0 / rmass[i1]; - } else { - invmass0 = 1.0 / mass[type[i0]]; - invmass1 = 1.0 / mass[type[i1]]; - } - - double a = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; - double b = 2.0 * (invmass0 + invmass1) * - (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); - double c = s01sq - bond1 * bond1; - - // error check - - double determ = b * b - 4.0 * a * c; - - if(determ < 0.0) { - error->warning(FLERR, "Shake determinant < 0.0"); - determ = 0.0; - } - - // exact quadratic solution for lamda - - double lamda, lamda1, lamda2; - lamda1 = (-b + sqrt(determ)) / (2.0 * a); - lamda2 = (-b - sqrt(determ)) / (2.0 * a); - - if(fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; - else lamda = lamda2; - - // update forces if atom is owned by this processor - lamda /= dtfsq; - - if(i0 < nlocal) { - f[i0][0] += lamda * r01[0]; - f[i0][1] += lamda * r01[1]; - f[i0][2] += lamda * r01[2]; - } - - if(i1 < nlocal) { - f[i1][0] -= lamda * r01[0]; - f[i1][1] -= lamda * r01[1]; - f[i1][2] -= lamda * r01[2]; - } - - if(evflag) { - nlist = 0; - - if(i0 < nlocal) list[nlist++] = i0; - - if(i1 < nlocal) list[nlist++] = i1; - - v[0] = lamda * r01[0] * r01[0]; - v[1] = lamda * r01[1] * r01[1]; - v[2] = lamda * r01[2] * r01[2]; - v[3] = lamda * r01[0] * r01[1]; - v[4] = lamda * r01[0] * r01[2]; - v[5] = lamda * r01[1] * r01[2]; - - v_tally(nlist, list, 2.0, v); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::shake3(int m) -{ - int nlist, list[3]; - double v[6]; - double invmass0, invmass1, invmass2; - - // local atom IDs and constraint distances - - int i0 = atom->map(shake_atom[m][0]); - int i1 = atom->map(shake_atom[m][1]); - int i2 = atom->map(shake_atom[m][2]); - double bond1 = bond_distance[shake_type[m][0]]; - double bond2 = bond_distance[shake_type[m][1]]; - - // r01,r02 = distance vec between atoms, with PBC - - double r01[3]; - r01[0] = x[i0][0] - x[i1][0]; - r01[1] = x[i0][1] - x[i1][1]; - r01[2] = x[i0][2] - x[i1][2]; - domain->minimum_image(r01); - - double r02[3]; - r02[0] = x[i0][0] - x[i2][0]; - r02[1] = x[i0][1] - x[i2][1]; - r02[2] = x[i0][2] - x[i2][2]; - domain->minimum_image(r02); - - // s01,s02 = distance vec after unconstrained update, with PBC - - double s01[3]; - s01[0] = xshake[i0][0] - xshake[i1][0]; - s01[1] = xshake[i0][1] - xshake[i1][1]; - s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); - - double s02[3]; - s02[0] = xshake[i0][0] - xshake[i2][0]; - s02[1] = xshake[i0][1] - xshake[i2][1]; - s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); - - // scalar distances between atoms - - double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; - double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; - double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; - double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; - - // matrix coeffs and rhs for lamda equations - - if(rmass) { - invmass0 = 1.0 / rmass[i0]; - invmass1 = 1.0 / rmass[i1]; - invmass2 = 1.0 / rmass[i2]; - } else { - invmass0 = 1.0 / mass[type[i0]]; - invmass1 = 1.0 / mass[type[i1]]; - invmass2 = 1.0 / mass[type[i2]]; - } - - double a11 = 2.0 * (invmass0 + invmass1) * - (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); - double a12 = 2.0 * invmass0 * - (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); - double a21 = 2.0 * invmass0 * - (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); - double a22 = 2.0 * (invmass0 + invmass2) * - (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); - - // inverse of matrix - - double determ = a11 * a22 - a12 * a21; - - if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); - - double determinv = 1.0 / determ; - - double a11inv = a22 * determinv; - double a12inv = -a12 * determinv; - double a21inv = -a21 * determinv; - double a22inv = a11 * determinv; - - // quadratic correction coeffs - - double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); - - double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; - double quad1_0202 = invmass0 * invmass0 * r02sq; - double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; - - double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; - double quad2_0101 = invmass0 * invmass0 * r01sq; - double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; - - // iterate until converged - - double lamda01 = 0.0; - double lamda02 = 0.0; - int niter = 0; - int done = 0; - - double quad1, quad2, b1, b2, lamda01_new, lamda02_new; - - while(!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01 * lamda01 + quad1_0202 * lamda02 * lamda02 + - quad1_0102 * lamda01 * lamda02; - quad2 = quad2_0101 * lamda01 * lamda01 + quad2_0202 * lamda02 * lamda02 + - quad2_0102 * lamda01 * lamda02; - - b1 = bond1 * bond1 - s01sq - quad1; - b2 = bond2 * bond2 - s02sq - quad2; - - lamda01_new = a11inv * b1 + a12inv * b2; - lamda02_new = a21inv * b1 + a22inv * b2; - - done = 1; - - if(fabs(lamda01_new - lamda01) > tolerance) done = 0; - - if(fabs(lamda02_new - lamda02) > tolerance) done = 0; - - lamda01 = lamda01_new; - lamda02 = lamda02_new; - niter++; - } - - // update forces if atom is owned by this processor - - lamda01 = lamda01 / dtfsq; - lamda02 = lamda02 / dtfsq; - - if(i0 < nlocal) { - f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0]; - f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1]; - f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2]; - } - - if(i1 < nlocal) { - f[i1][0] -= lamda01 * r01[0]; - f[i1][1] -= lamda01 * r01[1]; - f[i1][2] -= lamda01 * r01[2]; - } - - if(i2 < nlocal) { - f[i2][0] -= lamda02 * r02[0]; - f[i2][1] -= lamda02 * r02[1]; - f[i2][2] -= lamda02 * r02[2]; - } - - if(evflag) { - nlist = 0; - - if(i0 < nlocal) list[nlist++] = i0; - - if(i1 < nlocal) list[nlist++] = i1; - - if(i2 < nlocal) list[nlist++] = i2; - - v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0]; - v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1]; - v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2]; - v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1]; - v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2]; - v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2]; - - v_tally(nlist, list, 3.0, v); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::shake4(int m) -{ - int nlist, list[4]; - double v[6]; - double invmass0, invmass1, invmass2, invmass3; - - // local atom IDs and constraint distances - - int i0 = atom->map(shake_atom[m][0]); - int i1 = atom->map(shake_atom[m][1]); - int i2 = atom->map(shake_atom[m][2]); - int i3 = atom->map(shake_atom[m][3]); - double bond1 = bond_distance[shake_type[m][0]]; - double bond2 = bond_distance[shake_type[m][1]]; - double bond3 = bond_distance[shake_type[m][2]]; - - // r01,r02,r03 = distance vec between atoms, with PBC - - double r01[3]; - r01[0] = x[i0][0] - x[i1][0]; - r01[1] = x[i0][1] - x[i1][1]; - r01[2] = x[i0][2] - x[i1][2]; - domain->minimum_image(r01); - - double r02[3]; - r02[0] = x[i0][0] - x[i2][0]; - r02[1] = x[i0][1] - x[i2][1]; - r02[2] = x[i0][2] - x[i2][2]; - domain->minimum_image(r02); - - double r03[3]; - r03[0] = x[i0][0] - x[i3][0]; - r03[1] = x[i0][1] - x[i3][1]; - r03[2] = x[i0][2] - x[i3][2]; - domain->minimum_image(r03); - - // s01,s02,s03 = distance vec after unconstrained update, with PBC - - double s01[3]; - s01[0] = xshake[i0][0] - xshake[i1][0]; - s01[1] = xshake[i0][1] - xshake[i1][1]; - s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); - - double s02[3]; - s02[0] = xshake[i0][0] - xshake[i2][0]; - s02[1] = xshake[i0][1] - xshake[i2][1]; - s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); - - double s03[3]; - s03[0] = xshake[i0][0] - xshake[i3][0]; - s03[1] = xshake[i0][1] - xshake[i3][1]; - s03[2] = xshake[i0][2] - xshake[i3][2]; - domain->minimum_image(s03); - - // scalar distances between atoms - - double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; - double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; - double r03sq = r03[0] * r03[0] + r03[1] * r03[1] + r03[2] * r03[2]; - double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; - double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; - double s03sq = s03[0] * s03[0] + s03[1] * s03[1] + s03[2] * s03[2]; - - // matrix coeffs and rhs for lamda equations - - if(rmass) { - invmass0 = 1.0 / rmass[i0]; - invmass1 = 1.0 / rmass[i1]; - invmass2 = 1.0 / rmass[i2]; - invmass3 = 1.0 / rmass[i3]; - } else { - invmass0 = 1.0 / mass[type[i0]]; - invmass1 = 1.0 / mass[type[i1]]; - invmass2 = 1.0 / mass[type[i2]]; - invmass3 = 1.0 / mass[type[i3]]; - } - - double a11 = 2.0 * (invmass0 + invmass1) * - (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); - double a12 = 2.0 * invmass0 * - (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); - double a13 = 2.0 * invmass0 * - (s01[0] * r03[0] + s01[1] * r03[1] + s01[2] * r03[2]); - double a21 = 2.0 * invmass0 * - (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); - double a22 = 2.0 * (invmass0 + invmass2) * - (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); - double a23 = 2.0 * invmass0 * - (s02[0] * r03[0] + s02[1] * r03[1] + s02[2] * r03[2]); - double a31 = 2.0 * invmass0 * - (s03[0] * r01[0] + s03[1] * r01[1] + s03[2] * r01[2]); - double a32 = 2.0 * invmass0 * - (s03[0] * r02[0] + s03[1] * r02[1] + s03[2] * r02[2]); - double a33 = 2.0 * (invmass0 + invmass3) * - (s03[0] * r03[0] + s03[1] * r03[1] + s03[2] * r03[2]); - - // inverse of matrix; - - double determ = a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - - a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31; - - if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); - - double determinv = 1.0 / determ; - - double a11inv = determinv * (a22 * a33 - a23 * a32); - double a12inv = -determinv * (a12 * a33 - a13 * a32); - double a13inv = determinv * (a12 * a23 - a13 * a22); - double a21inv = -determinv * (a21 * a33 - a23 * a31); - double a22inv = determinv * (a11 * a33 - a13 * a31); - double a23inv = -determinv * (a11 * a23 - a13 * a21); - double a31inv = determinv * (a21 * a32 - a22 * a31); - double a32inv = -determinv * (a11 * a32 - a12 * a31); - double a33inv = determinv * (a11 * a22 - a12 * a21); - - // quadratic correction coeffs - - double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); - double r0103 = (r01[0] * r03[0] + r01[1] * r03[1] + r01[2] * r03[2]); - double r0203 = (r02[0] * r03[0] + r02[1] * r03[1] + r02[2] * r03[2]); - - double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; - double quad1_0202 = invmass0 * invmass0 * r02sq; - double quad1_0303 = invmass0 * invmass0 * r03sq; - double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; - double quad1_0103 = 2.0 * (invmass0 + invmass1) * invmass0 * r0103; - double quad1_0203 = 2.0 * invmass0 * invmass0 * r0203; - - double quad2_0101 = invmass0 * invmass0 * r01sq; - double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; - double quad2_0303 = invmass0 * invmass0 * r03sq; - double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; - double quad2_0103 = 2.0 * invmass0 * invmass0 * r0103; - double quad2_0203 = 2.0 * (invmass0 + invmass2) * invmass0 * r0203; - - double quad3_0101 = invmass0 * invmass0 * r01sq; - double quad3_0202 = invmass0 * invmass0 * r02sq; - double quad3_0303 = (invmass0 + invmass3) * (invmass0 + invmass3) * r03sq; - double quad3_0102 = 2.0 * invmass0 * invmass0 * r0102; - double quad3_0103 = 2.0 * (invmass0 + invmass3) * invmass0 * r0103; - double quad3_0203 = 2.0 * (invmass0 + invmass3) * invmass0 * r0203; - - // iterate until converged - - double lamda01 = 0.0; - double lamda02 = 0.0; - double lamda03 = 0.0; - int niter = 0; - int done = 0; - - double quad1, quad2, quad3, b1, b2, b3, lamda01_new, lamda02_new, lamda03_new; - - while(!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01 * lamda01 + - quad1_0202 * lamda02 * lamda02 + - quad1_0303 * lamda03 * lamda03 + - quad1_0102 * lamda01 * lamda02 + - quad1_0103 * lamda01 * lamda03 + - quad1_0203 * lamda02 * lamda03; - - quad2 = quad2_0101 * lamda01 * lamda01 + - quad2_0202 * lamda02 * lamda02 + - quad2_0303 * lamda03 * lamda03 + - quad2_0102 * lamda01 * lamda02 + - quad2_0103 * lamda01 * lamda03 + - quad2_0203 * lamda02 * lamda03; - - quad3 = quad3_0101 * lamda01 * lamda01 + - quad3_0202 * lamda02 * lamda02 + - quad3_0303 * lamda03 * lamda03 + - quad3_0102 * lamda01 * lamda02 + - quad3_0103 * lamda01 * lamda03 + - quad3_0203 * lamda02 * lamda03; - - b1 = bond1 * bond1 - s01sq - quad1; - b2 = bond2 * bond2 - s02sq - quad2; - b3 = bond3 * bond3 - s03sq - quad3; - - lamda01_new = a11inv * b1 + a12inv * b2 + a13inv * b3; - lamda02_new = a21inv * b1 + a22inv * b2 + a23inv * b3; - lamda03_new = a31inv * b1 + a32inv * b2 + a33inv * b3; - - done = 1; - - if(fabs(lamda01_new - lamda01) > tolerance) done = 0; - - if(fabs(lamda02_new - lamda02) > tolerance) done = 0; - - if(fabs(lamda03_new - lamda03) > tolerance) done = 0; - - lamda01 = lamda01_new; - lamda02 = lamda02_new; - lamda03 = lamda03_new; - niter++; - } - - // update forces if atom is owned by this processor - - lamda01 = lamda01 / dtfsq; - lamda02 = lamda02 / dtfsq; - lamda03 = lamda03 / dtfsq; - - if(i0 < nlocal) { - f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0] + lamda03 * r03[0]; - f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1] + lamda03 * r03[1]; - f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2] + lamda03 * r03[2]; - } - - if(i1 < nlocal) { - f[i1][0] -= lamda01 * r01[0]; - f[i1][1] -= lamda01 * r01[1]; - f[i1][2] -= lamda01 * r01[2]; - } - - if(i2 < nlocal) { - f[i2][0] -= lamda02 * r02[0]; - f[i2][1] -= lamda02 * r02[1]; - f[i2][2] -= lamda02 * r02[2]; - } - - if(i3 < nlocal) { - f[i3][0] -= lamda03 * r03[0]; - f[i3][1] -= lamda03 * r03[1]; - f[i3][2] -= lamda03 * r03[2]; - } - - if(evflag) { - nlist = 0; - - if(i0 < nlocal) list[nlist++] = i0; - - if(i1 < nlocal) list[nlist++] = i1; - - if(i2 < nlocal) list[nlist++] = i2; - - if(i3 < nlocal) list[nlist++] = i3; - - v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0] + lamda03 * r03[0] * r03[0]; - v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1] + lamda03 * r03[1] * r03[1]; - v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2] + lamda03 * r03[2] * r03[2]; - v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1] + lamda03 * r03[0] * r03[1]; - v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2] + lamda03 * r03[0] * r03[2]; - v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2] + lamda03 * r03[1] * r03[2]; - //if(i0==7271) printf("%lf %lf %lf %lf %lf %lf\n",v[0],v[1],v[2],v[3],v[4],v[5]); - - v_tally(nlist, list, 4.0, v); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::shake3angle(int m) -{ - int nlist, list[3]; - double v[6]; - double invmass0, invmass1, invmass2; - - // local atom IDs and constraint distances - - int i0 = atom->map(shake_atom[m][0]); - int i1 = atom->map(shake_atom[m][1]); - int i2 = atom->map(shake_atom[m][2]); - double bond1 = bond_distance[shake_type[m][0]]; - double bond2 = bond_distance[shake_type[m][1]]; - double bond12 = angle_distance[shake_type[m][2]]; - - // r01,r02,r12 = distance vec between atoms, with PBC - - double r01[3]; - r01[0] = x[i0][0] - x[i1][0]; - r01[1] = x[i0][1] - x[i1][1]; - r01[2] = x[i0][2] - x[i1][2]; - domain->minimum_image(r01); - - double r02[3]; - r02[0] = x[i0][0] - x[i2][0]; - r02[1] = x[i0][1] - x[i2][1]; - r02[2] = x[i0][2] - x[i2][2]; - domain->minimum_image(r02); - - double r12[3]; - r12[0] = x[i1][0] - x[i2][0]; - r12[1] = x[i1][1] - x[i2][1]; - r12[2] = x[i1][2] - x[i2][2]; - domain->minimum_image(r12); - - // s01,s02,s12 = distance vec after unconstrained update, with PBC - - double s01[3]; - s01[0] = xshake[i0][0] - xshake[i1][0]; - s01[1] = xshake[i0][1] - xshake[i1][1]; - s01[2] = xshake[i0][2] - xshake[i1][2]; - domain->minimum_image(s01); - - double s02[3]; - s02[0] = xshake[i0][0] - xshake[i2][0]; - s02[1] = xshake[i0][1] - xshake[i2][1]; - s02[2] = xshake[i0][2] - xshake[i2][2]; - domain->minimum_image(s02); - - double s12[3]; - s12[0] = xshake[i1][0] - xshake[i2][0]; - s12[1] = xshake[i1][1] - xshake[i2][1]; - s12[2] = xshake[i1][2] - xshake[i2][2]; - domain->minimum_image(s12); - - // scalar distances between atoms - - double r01sq = r01[0] * r01[0] + r01[1] * r01[1] + r01[2] * r01[2]; - double r02sq = r02[0] * r02[0] + r02[1] * r02[1] + r02[2] * r02[2]; - double r12sq = r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]; - double s01sq = s01[0] * s01[0] + s01[1] * s01[1] + s01[2] * s01[2]; - double s02sq = s02[0] * s02[0] + s02[1] * s02[1] + s02[2] * s02[2]; - double s12sq = s12[0] * s12[0] + s12[1] * s12[1] + s12[2] * s12[2]; - - // matrix coeffs and rhs for lamda equations - - if(rmass) { - invmass0 = 1.0 / rmass[i0]; - invmass1 = 1.0 / rmass[i1]; - invmass2 = 1.0 / rmass[i2]; - } else { - invmass0 = 1.0 / mass[type[i0]]; - invmass1 = 1.0 / mass[type[i1]]; - invmass2 = 1.0 / mass[type[i2]]; - } - - double a11 = 2.0 * (invmass0 + invmass1) * - (s01[0] * r01[0] + s01[1] * r01[1] + s01[2] * r01[2]); - double a12 = 2.0 * invmass0 * - (s01[0] * r02[0] + s01[1] * r02[1] + s01[2] * r02[2]); - double a13 = - 2.0 * invmass1 * - (s01[0] * r12[0] + s01[1] * r12[1] + s01[2] * r12[2]); - double a21 = 2.0 * invmass0 * - (s02[0] * r01[0] + s02[1] * r01[1] + s02[2] * r01[2]); - double a22 = 2.0 * (invmass0 + invmass2) * - (s02[0] * r02[0] + s02[1] * r02[1] + s02[2] * r02[2]); - double a23 = 2.0 * invmass2 * - (s02[0] * r12[0] + s02[1] * r12[1] + s02[2] * r12[2]); - double a31 = - 2.0 * invmass1 * - (s12[0] * r01[0] + s12[1] * r01[1] + s12[2] * r01[2]); - double a32 = 2.0 * invmass2 * - (s12[0] * r02[0] + s12[1] * r02[1] + s12[2] * r02[2]); - double a33 = 2.0 * (invmass1 + invmass2) * - (s12[0] * r12[0] + s12[1] * r12[1] + s12[2] * r12[2]); - - // inverse of matrix - - double determ = a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - - a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31; - - if(determ == 0.0) error->one(FLERR, "Shake determinant = 0.0"); - - double determinv = 1.0 / determ; - - double a11inv = determinv * (a22 * a33 - a23 * a32); - double a12inv = -determinv * (a12 * a33 - a13 * a32); - double a13inv = determinv * (a12 * a23 - a13 * a22); - double a21inv = -determinv * (a21 * a33 - a23 * a31); - double a22inv = determinv * (a11 * a33 - a13 * a31); - double a23inv = -determinv * (a11 * a23 - a13 * a21); - double a31inv = determinv * (a21 * a32 - a22 * a31); - double a32inv = -determinv * (a11 * a32 - a12 * a31); - double a33inv = determinv * (a11 * a22 - a12 * a21); - - // quadratic correction coeffs - - double r0102 = (r01[0] * r02[0] + r01[1] * r02[1] + r01[2] * r02[2]); - double r0112 = (r01[0] * r12[0] + r01[1] * r12[1] + r01[2] * r12[2]); - double r0212 = (r02[0] * r12[0] + r02[1] * r12[1] + r02[2] * r12[2]); - - double quad1_0101 = (invmass0 + invmass1) * (invmass0 + invmass1) * r01sq; - double quad1_0202 = invmass0 * invmass0 * r02sq; - double quad1_1212 = invmass1 * invmass1 * r12sq; - double quad1_0102 = 2.0 * (invmass0 + invmass1) * invmass0 * r0102; - double quad1_0112 = - 2.0 * (invmass0 + invmass1) * invmass1 * r0112; - double quad1_0212 = - 2.0 * invmass0 * invmass1 * r0212; - - double quad2_0101 = invmass0 * invmass0 * r01sq; - double quad2_0202 = (invmass0 + invmass2) * (invmass0 + invmass2) * r02sq; - double quad2_1212 = invmass2 * invmass2 * r12sq; - double quad2_0102 = 2.0 * (invmass0 + invmass2) * invmass0 * r0102; - double quad2_0112 = 2.0 * invmass0 * invmass2 * r0112; - double quad2_0212 = 2.0 * (invmass0 + invmass2) * invmass2 * r0212; - - double quad3_0101 = invmass1 * invmass1 * r01sq; - double quad3_0202 = invmass2 * invmass2 * r02sq; - double quad3_1212 = (invmass1 + invmass2) * (invmass1 + invmass2) * r12sq; - double quad3_0102 = - 2.0 * invmass1 * invmass2 * r0102; - double quad3_0112 = - 2.0 * (invmass1 + invmass2) * invmass1 * r0112; - double quad3_0212 = 2.0 * (invmass1 + invmass2) * invmass2 * r0212; - - // iterate until converged - - double lamda01 = 0.0; - double lamda02 = 0.0; - double lamda12 = 0.0; - int niter = 0; - int done = 0; - - double quad1, quad2, quad3, b1, b2, b3, lamda01_new, lamda02_new, lamda12_new; - - while(!done && niter < max_iter) { - quad1 = quad1_0101 * lamda01 * lamda01 + - quad1_0202 * lamda02 * lamda02 + - quad1_1212 * lamda12 * lamda12 + - quad1_0102 * lamda01 * lamda02 + - quad1_0112 * lamda01 * lamda12 + - quad1_0212 * lamda02 * lamda12; - - quad2 = quad2_0101 * lamda01 * lamda01 + - quad2_0202 * lamda02 * lamda02 + - quad2_1212 * lamda12 * lamda12 + - quad2_0102 * lamda01 * lamda02 + - quad2_0112 * lamda01 * lamda12 + - quad2_0212 * lamda02 * lamda12; - - quad3 = quad3_0101 * lamda01 * lamda01 + - quad3_0202 * lamda02 * lamda02 + - quad3_1212 * lamda12 * lamda12 + - quad3_0102 * lamda01 * lamda02 + - quad3_0112 * lamda01 * lamda12 + - quad3_0212 * lamda02 * lamda12; - - b1 = bond1 * bond1 - s01sq - quad1; - b2 = bond2 * bond2 - s02sq - quad2; - b3 = bond12 * bond12 - s12sq - quad3; - - lamda01_new = a11inv * b1 + a12inv * b2 + a13inv * b3; - lamda02_new = a21inv * b1 + a22inv * b2 + a23inv * b3; - lamda12_new = a31inv * b1 + a32inv * b2 + a33inv * b3; - - done = 1; - - if(fabs(lamda01_new - lamda01) > tolerance) done = 0; - - if(fabs(lamda02_new - lamda02) > tolerance) done = 0; - - if(fabs(lamda12_new - lamda12) > tolerance) done = 0; - - lamda01 = lamda01_new; - lamda02 = lamda02_new; - lamda12 = lamda12_new; - niter++; - } - - // update forces if atom is owned by this processor - - lamda01 = lamda01 / dtfsq; - lamda02 = lamda02 / dtfsq; - lamda12 = lamda12 / dtfsq; - - if(i0 < nlocal) { - f[i0][0] += lamda01 * r01[0] + lamda02 * r02[0]; - f[i0][1] += lamda01 * r01[1] + lamda02 * r02[1]; - f[i0][2] += lamda01 * r01[2] + lamda02 * r02[2]; - } - - if(i1 < nlocal) { - f[i1][0] -= lamda01 * r01[0] - lamda12 * r12[0]; - f[i1][1] -= lamda01 * r01[1] - lamda12 * r12[1]; - f[i1][2] -= lamda01 * r01[2] - lamda12 * r12[2]; - } - - if(i2 < nlocal) { - f[i2][0] -= lamda02 * r02[0] + lamda12 * r12[0]; - f[i2][1] -= lamda02 * r02[1] + lamda12 * r12[1]; - f[i2][2] -= lamda02 * r02[2] + lamda12 * r12[2]; - } - - if(evflag) { - nlist = 0; - - if(i0 < nlocal) list[nlist++] = i0; - - if(i1 < nlocal) list[nlist++] = i1; - - if(i2 < nlocal) list[nlist++] = i2; - - v[0] = lamda01 * r01[0] * r01[0] + lamda02 * r02[0] * r02[0] + lamda12 * r12[0] * r12[0]; - v[1] = lamda01 * r01[1] * r01[1] + lamda02 * r02[1] * r02[1] + lamda12 * r12[1] * r12[1]; - v[2] = lamda01 * r01[2] * r01[2] + lamda02 * r02[2] * r02[2] + lamda12 * r12[2] * r12[2]; - v[3] = lamda01 * r01[0] * r01[1] + lamda02 * r02[0] * r02[1] + lamda12 * r12[0] * r12[1]; - v[4] = lamda01 * r01[0] * r01[2] + lamda02 * r02[0] * r02[2] + lamda12 * r12[0] * r12[2]; - v[5] = lamda01 * r01[1] * r01[2] + lamda02 * r02[1] * r02[2] + lamda12 * r12[1] * r12[2]; - - v_tally(nlist, list, 3.0, v); - } -} - -/* ---------------------------------------------------------------------- - print-out bond & angle statistics -------------------------------------------------------------------------- */ - -void FixShakeCuda::stats() -{ - int i, j, m, n, iatom, jatom, katom; - double delx, dely, delz; - double r, r1, r2, r3, angle; - - // zero out accumulators - - int nb = atom->nbondtypes + 1; - int na = atom->nangletypes + 1; - - for(i = 0; i < nb; i++) { - b_count[i] = 0; - b_ave[i] = b_max[i] = 0.0; - b_min[i] = BIG; - } - - for(i = 0; i < na; i++) { - a_count[i] = 0; - a_ave[i] = a_max[i] = 0.0; - a_min[i] = BIG; - } - - // log stats for each bond & angle - // OK to double count since are just averaging - - double** x = atom->x; - int nlocal = atom->nlocal; - - for(i = 0; i < nlocal; i++) { - if(shake_flag[i] == 0) continue; - - // bond stats - - n = shake_flag[i]; - - if(n == 1) n = 3; - - iatom = atom->map(shake_atom[i][0]); - - for(j = 1; j < n; j++) { - jatom = atom->map(shake_atom[i][j]); - delx = x[iatom][0] - x[jatom][0]; - dely = x[iatom][1] - x[jatom][1]; - delz = x[iatom][2] - x[jatom][2]; - domain->minimum_image(delx, dely, delz); - r = sqrt(delx * delx + dely * dely + delz * delz); - - m = shake_type[i][j - 1]; - b_count[m]++; - b_ave[m] += r; - b_max[m] = MAX(b_max[m], r); - b_min[m] = MIN(b_min[m], r); - } - - // angle stats - - if(shake_flag[i] == 1) { - iatom = atom->map(shake_atom[i][0]); - jatom = atom->map(shake_atom[i][1]); - katom = atom->map(shake_atom[i][2]); - - delx = x[iatom][0] - x[jatom][0]; - dely = x[iatom][1] - x[jatom][1]; - delz = x[iatom][2] - x[jatom][2]; - domain->minimum_image(delx, dely, delz); - r1 = sqrt(delx * delx + dely * dely + delz * delz); - - delx = x[iatom][0] - x[katom][0]; - dely = x[iatom][1] - x[katom][1]; - delz = x[iatom][2] - x[katom][2]; - domain->minimum_image(delx, dely, delz); - r2 = sqrt(delx * delx + dely * dely + delz * delz); - - delx = x[jatom][0] - x[katom][0]; - dely = x[jatom][1] - x[katom][1]; - delz = x[jatom][2] - x[katom][2]; - domain->minimum_image(delx, dely, delz); - r3 = sqrt(delx * delx + dely * dely + delz * delz); - - angle = acos((r1 * r1 + r2 * r2 - r3 * r3) / (2.0 * r1 * r2)); - angle *= 180.0 / MY_PI; - m = shake_type[i][2]; - a_count[m]++; - a_ave[m] += angle; - a_max[m] = MAX(a_max[m], angle); - a_min[m] = MIN(a_min[m], angle); - } - } - - // sum across all procs - - MPI_Allreduce(b_count, b_count_all, nb, MPI_INT, MPI_SUM, world); - MPI_Allreduce(b_ave, b_ave_all, nb, MPI_DOUBLE, MPI_SUM, world); - MPI_Allreduce(b_max, b_max_all, nb, MPI_DOUBLE, MPI_MAX, world); - MPI_Allreduce(b_min, b_min_all, nb, MPI_DOUBLE, MPI_MIN, world); - - MPI_Allreduce(a_count, a_count_all, na, MPI_INT, MPI_SUM, world); - MPI_Allreduce(a_ave, a_ave_all, na, MPI_DOUBLE, MPI_SUM, world); - MPI_Allreduce(a_max, a_max_all, na, MPI_DOUBLE, MPI_MAX, world); - MPI_Allreduce(a_min, a_min_all, na, MPI_DOUBLE, MPI_MIN, world); - - // print stats only for non-zero counts - - if(me == 0) { - if(screen) { - fprintf(screen, - "SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n", - update->ntimestep); - - for(i = 1; i < nb; i++) - if(b_count_all[i]) - fprintf(screen, " %d %g %g\n", i, - b_ave_all[i] / b_count_all[i], b_max_all[i] - b_min_all[i]); - - for(i = 1; i < na; i++) - if(a_count_all[i]) - fprintf(screen, " %d %g %g\n", i, - a_ave_all[i] / a_count_all[i], a_max_all[i] - a_min_all[i]); - } - - if(logfile) { - fprintf(logfile, - "SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n", - update->ntimestep); - - for(i = 0; i < nb; i++) - if(b_count_all[i]) - fprintf(logfile, " %d %g %g\n", i, - b_ave_all[i] / b_count_all[i], b_max_all[i] - b_min_all[i]); - - for(i = 0; i < na; i++) - if(a_count_all[i]) - fprintf(logfile, " %d %g %g\n", i, - a_ave_all[i] / a_count_all[i], a_max_all[i] - a_min_all[i]); - } - } - - // next timestep for stats - - next_output += output_every; -} - -/* ---------------------------------------------------------------------- - find a bond between global tags n1 and n2 stored with local atom i - return -1 if don't find it - return bond index if do find it -------------------------------------------------------------------------- */ - -int FixShakeCuda::bondfind(int i, int n1, int n2) -{ - int* tag = atom->tag; - int** bond_atom = atom->bond_atom; - int nbonds = atom->num_bond[i]; - - int m; - - for(m = 0; m < nbonds; m++) { - if(n1 == tag[i] && n2 == bond_atom[i][m]) break; - - if(n1 == bond_atom[i][m] && n2 == tag[i]) break; - } - - if(m < nbonds) return m; - - return -1; -} - -/* ---------------------------------------------------------------------- - find an angle with global end atoms n1 and n2 stored with local atom i - return -1 if don't find it - return angle index if do find it -------------------------------------------------------------------------- */ - -int FixShakeCuda::anglefind(int i, int n1, int n2) -{ - int** angle_atom1 = atom->angle_atom1; - int** angle_atom3 = atom->angle_atom3; - int nangles = atom->num_angle[i]; - - int m; - - for(m = 0; m < nangles; m++) { - if(n1 == angle_atom1[i][m] && n2 == angle_atom3[i][m]) break; - - if(n1 == angle_atom3[i][m] && n2 == angle_atom1[i][m]) break; - } - - if(m < nangles) return m; - - return -1; -} - -/* ---------------------------------------------------------------------- - memory usage of local atom-based arrays -------------------------------------------------------------------------- */ - -double FixShakeCuda::memory_usage() -{ - int nmax = atom->nmax; - double bytes = nmax * sizeof(int); - bytes += nmax * 4 * sizeof(int); - bytes += nmax * 3 * sizeof(int); - bytes += nmax * 3 * sizeof(double); - bytes += maxvatom * 6 * sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - allocate local atom-based arrays -------------------------------------------------------------------------- */ - -void FixShakeCuda::grow_arrays(int nmax) -{ - memory->grow(shake_flag, nmax, "shake:shake_flag"); - memory->grow(shake_atom, nmax, 4, "shake:shake_atom"); - memory->grow(shake_type, nmax, 3, "shake:shake_type"); - memory->destroy(xshake); - memory->create(xshake, nmax, 3, "shake:xshake"); - - delete cu_shake_flag; - cu_shake_flag = new cCudaData<int, int, xx > (shake_flag, nmax); - delete cu_shake_atom; - cu_shake_atom = new cCudaData<int, int, yx> ((int*)shake_atom, nmax, 4); - delete cu_shake_type; - cu_shake_type = new cCudaData<int, int, yx> ((int*)shake_type, nmax, 3); - delete cu_xshake; - cu_xshake = new cCudaData<double, X_CFLOAT, xy> ((double*)xshake, nmax, 3); - cu_shake_flag->upload(); - cu_shake_atom->upload(); - cu_shake_type->upload(); - - if(cu_bond_distance) - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); -} - -/* ---------------------------------------------------------------------- - copy values within local atom-based arrays -------------------------------------------------------------------------- */ - -void FixShakeCuda::copy_arrays(int i, int j, int delflag) -{ - int flag = shake_flag[j] = shake_flag[i]; - - if(flag == 1) { - shake_atom[j][0] = shake_atom[i][0]; - shake_atom[j][1] = shake_atom[i][1]; - shake_atom[j][2] = shake_atom[i][2]; - shake_type[j][0] = shake_type[i][0]; - shake_type[j][1] = shake_type[i][1]; - shake_type[j][2] = shake_type[i][2]; - } else if(flag == 2) { - shake_atom[j][0] = shake_atom[i][0]; - shake_atom[j][1] = shake_atom[i][1]; - shake_type[j][0] = shake_type[i][0]; - } else if(flag == 3) { - shake_atom[j][0] = shake_atom[i][0]; - shake_atom[j][1] = shake_atom[i][1]; - shake_atom[j][2] = shake_atom[i][2]; - shake_type[j][0] = shake_type[i][0]; - shake_type[j][1] = shake_type[i][1]; - } else if(flag == 4) { - shake_atom[j][0] = shake_atom[i][0]; - shake_atom[j][1] = shake_atom[i][1]; - shake_atom[j][2] = shake_atom[i][2]; - shake_atom[j][3] = shake_atom[i][3]; - shake_type[j][0] = shake_type[i][0]; - shake_type[j][1] = shake_type[i][1]; - shake_type[j][2] = shake_type[i][2]; - } -} - -/* ---------------------------------------------------------------------- - initialize one atom's array values, called when atom is created -------------------------------------------------------------------------- */ - -void FixShakeCuda::set_arrays(int i) -{ - shake_flag[i] = 0; -} - -/* ---------------------------------------------------------------------- - pack values in local atom-based arrays for exchange with another proc -------------------------------------------------------------------------- */ - -int FixShakeCuda::pack_exchange(int i, double* buf) -{ - int m = 0; - buf[m++] = shake_flag[i]; - int flag = shake_flag[i]; - - if(flag == 1) { - buf[m++] = shake_atom[i][0]; - buf[m++] = shake_atom[i][1]; - buf[m++] = shake_atom[i][2]; - buf[m++] = shake_type[i][0]; - buf[m++] = shake_type[i][1]; - buf[m++] = shake_type[i][2]; - } else if(flag == 2) { - buf[m++] = shake_atom[i][0]; - buf[m++] = shake_atom[i][1]; - buf[m++] = shake_type[i][0]; - } else if(flag == 3) { - buf[m++] = shake_atom[i][0]; - buf[m++] = shake_atom[i][1]; - buf[m++] = shake_atom[i][2]; - buf[m++] = shake_type[i][0]; - buf[m++] = shake_type[i][1]; - } else if(flag == 4) { - buf[m++] = shake_atom[i][0]; - buf[m++] = shake_atom[i][1]; - buf[m++] = shake_atom[i][2]; - buf[m++] = shake_atom[i][3]; - buf[m++] = shake_type[i][0]; - buf[m++] = shake_type[i][1]; - buf[m++] = shake_type[i][2]; - } - - return m; -} - -/* ---------------------------------------------------------------------- - unpack values in local atom-based arrays from exchange with another proc -------------------------------------------------------------------------- */ - -int FixShakeCuda::unpack_exchange(int nlocal, double* buf) -{ - int m = 0; - int flag = shake_flag[nlocal] = static_cast<int>(buf[m++]); - - if(flag == 1) { - shake_atom[nlocal][0] = static_cast<int>(buf[m++]); - shake_atom[nlocal][1] = static_cast<int>(buf[m++]); - shake_atom[nlocal][2] = static_cast<int>(buf[m++]); - shake_type[nlocal][0] = static_cast<int>(buf[m++]); - shake_type[nlocal][1] = static_cast<int>(buf[m++]); - shake_type[nlocal][2] = static_cast<int>(buf[m++]); - } else if(flag == 2) { - shake_atom[nlocal][0] = static_cast<int>(buf[m++]); - shake_atom[nlocal][1] = static_cast<int>(buf[m++]); - shake_type[nlocal][0] = static_cast<int>(buf[m++]); - } else if(flag == 3) { - shake_atom[nlocal][0] = static_cast<int>(buf[m++]); - shake_atom[nlocal][1] = static_cast<int>(buf[m++]); - shake_atom[nlocal][2] = static_cast<int>(buf[m++]); - shake_type[nlocal][0] = static_cast<int>(buf[m++]); - shake_type[nlocal][1] = static_cast<int>(buf[m++]); - } else if(flag == 4) { - shake_atom[nlocal][0] = static_cast<int>(buf[m++]); - shake_atom[nlocal][1] = static_cast<int>(buf[m++]); - shake_atom[nlocal][2] = static_cast<int>(buf[m++]); - shake_atom[nlocal][3] = static_cast<int>(buf[m++]); - shake_type[nlocal][0] = static_cast<int>(buf[m++]); - shake_type[nlocal][1] = static_cast<int>(buf[m++]); - shake_type[nlocal][2] = static_cast<int>(buf[m++]); - } - - return m; -} - -/* ---------------------------------------------------------------------- - enforce SHAKE constraints from rRESPA - prediction portion is different than Verlet - rRESPA updating of atom coords is done with full v, but only portions of f -------------------------------------------------------------------------- */ -#if 0 -void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop) -{ - // call stats only on outermost level - - if(ilevel == nlevels_respa - 1 && update->ntimestep == next_output) stats(); - - // perform SHAKE on every loop iteration of every rRESPA level - // except last loop iteration of inner levels - - if(ilevel < nlevels_respa - 1 && iloop == loop_respa[ilevel] - 1) return; - - // xshake = atom coords after next x update in innermost loop - // depends on rRESPA level - // for levels > 0 this includes more than one velocity update - // xshake = predicted position from call to this routine at level N = - // x + dt0 (v + dtN/m fN + 1/2 dt(N-1)/m f(N-1) + ... + 1/2 dt0/m f0) - - double** *f_level = ((FixRespa*) modify->fix[ifix_respa])->f_level; - dtfsq = dtf_inner * step_respa[ilevel]; - - double invmass, dtfmsq; - int jlevel; - - if(rmass) { - for(int i = 0; i < nlocal; i++) { - if(shake_flag[i]) { - invmass = 1.0 / rmass[i]; - dtfmsq = dtfsq * invmass; - xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; - xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; - xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; - - for(jlevel = 0; jlevel < ilevel; jlevel++) { - dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass; - xshake[i][0] += dtfmsq * f_level[i][jlevel][0]; - xshake[i][1] += dtfmsq * f_level[i][jlevel][1]; - xshake[i][2] += dtfmsq * f_level[i][jlevel][2]; - } - } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; - } - - } else { - for(int i = 0; i < nlocal; i++) { - if(shake_flag[i]) { - invmass = 1.0 / mass[type[i]]; - dtfmsq = dtfsq * invmass; - xshake[i][0] = x[i][0] + dtv * v[i][0] + dtfmsq * f[i][0]; - xshake[i][1] = x[i][1] + dtv * v[i][1] + dtfmsq * f[i][1]; - xshake[i][2] = x[i][2] + dtv * v[i][2] + dtfmsq * f[i][2]; - - for(jlevel = 0; jlevel < ilevel; jlevel++) { - dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass; - xshake[i][0] += dtfmsq * f_level[i][jlevel][0]; - xshake[i][1] += dtfmsq * f_level[i][jlevel][1]; - xshake[i][2] += dtfmsq * f_level[i][jlevel][2]; - } - } else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0; - } - } - - // communicate results if necessary - - if(nprocs > 1) comm->forward_comm_fix(this); - - // virial setup - - if(vflag) v_setup(vflag); - else evflag = 0; - - // loop over clusters - - int m; - - for(int i = 0; i < nlist; i++) { - m = list[i]; - - if(shake_flag[m] == 2) shake2(m); - else if(shake_flag[m] == 3) shake3(m); - else if(shake_flag[m] == 4) shake4(m); - else shake3angle(m); - } -} -#endif - -/* ---------------------------------------------------------------------- */ - -int FixShakeCuda::pack_forward_comm(int n, int* list, double* buf, - int pbc_flag, int* pbc) -{ - if(cuda->finished_setup) { - int iswap = *list; - - if(iswap < 0) { - iswap = -iswap - 1; - int first = ((int*) buf)[0]; - Cuda_FixShakeCuda_PackComm_Self(&cuda->shared_data, n, iswap, first, pbc, pbc_flag); - } else - Cuda_FixShakeCuda_PackComm(&cuda->shared_data, n, iswap, (void*) buf, pbc, pbc_flag); - - return 3*n; - } - - int i, j, m; - double dx, dy, dz; - - m = 0; - - if(pbc_flag == 0) { - for(i = 0; i < n; i++) { - j = list[i]; - buf[m++] = xshake[j][0]; - buf[m++] = xshake[j][1]; - buf[m++] = xshake[j][2]; - } - } else { - if(domain->triclinic == 0) { - dx = pbc[0] * domain->xprd; - dy = pbc[1] * domain->yprd; - dz = pbc[2] * domain->zprd; - } else { - dx = pbc[0] * domain->xprd + pbc[5] * domain->xy + pbc[4] * domain->xz; - dy = pbc[1] * domain->yprd + pbc[3] * domain->yz; - dz = pbc[2] * domain->zprd; - } - - for(i = 0; i < n; i++) { - j = list[i]; - buf[m++] = xshake[j][0] + dx; - buf[m++] = xshake[j][1] + dy; - buf[m++] = xshake[j][2] + dz; - } - } - - return m; -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::unpack_forward_comm(int n, int first, double* buf) -{ - if(cuda->finished_setup) { - Cuda_FixShakeCuda_UnpackComm(&cuda->shared_data, n, first, (void*)buf); - return; - } - - int i, m, last; - - m = 0; - last = first + n; - - for(i = first; i < last; i++) { - xshake[i][0] = buf[m++]; - xshake[i][1] = buf[m++]; - xshake[i][2] = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void FixShakeCuda::reset_dt() -{ - if(strstr(update->integrate_style, "verlet")) { - dtv = update->dt; - dtfsq = update->dt * update->dt * force->ftm2v; - } else { - dtv = step_respa[0]; - dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v; - dtf_inner = step_respa[0] * force->ftm2v; - } - - if(cu_shake_atom) - Cuda_FixShakeCuda_Init(&cuda->shared_data, dtv, dtfsq, - cu_shake_flag->dev_data(), cu_shake_atom->dev_data(), cu_shake_type->dev_data(), cu_xshake->dev_data(), - cu_bond_distance->dev_data(), cu_angle_distance->dev_data(), cu_virial->dev_data(), - max_iter, tolerance); -} diff --git a/src/USER-CUDA/fix_shake_cuda.h b/src/USER-CUDA/fix_shake_cuda.h deleted file mode 100644 index 577ea1daa4..0000000000 --- a/src/USER-CUDA/fix_shake_cuda.h +++ /dev/null @@ -1,130 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(shake/cuda,FixShakeCuda) - -#else - -#ifndef LMP_FIX_SHAKE_CUDA_H -#define LMP_FIX_SHAKE_CUDA_H - -#include "fix.h" -#include "cuda_data.h" -#include "cuda_precision.h" - -namespace LAMMPS_NS { - -class FixShakeCuda : public Fix { - public: - FixShakeCuda(class LAMMPS *, int, char **); - ~FixShakeCuda(); - int setmask(); - void init(); - void setup(int); - void pre_neighbor(); - void post_force(int); - //void post_force_respa(int, int, int); - - double memory_usage(); - void grow_arrays(int); - void copy_arrays(int, int, int); - void set_arrays(int); - int pack_exchange(int, double *); - int unpack_exchange(int, double *); - int pack_forward_comm(int, int *, double *, int, int *); - void unpack_forward_comm(int, int, double *); - - int dof(int); - void reset_dt(); - - double time_postforce; - private: - class Cuda *cuda; - int me,nprocs; - double tolerance; // SHAKE tolerance - int max_iter; // max # of SHAKE iterations - int output_every; // SHAKE stat output every so often - int next_output; // timestep for next output - - // settings from input command - int *bond_flag,*angle_flag; // bond/angle types to constrain - int *type_flag; // constrain bonds to these types - double *mass_list; // constrain bonds to these masses - int nmass; // # of masses in mass_list - bool neighbor_step; // was neighboring done in this step -> need to run the Cuda_FixShake_Init - - double *bond_distance,*angle_distance; // constraint distances - cCudaData<double , X_CFLOAT , xx >* cu_bond_distance; - cCudaData<double , X_CFLOAT , xx >* cu_angle_distance; - - int ifix_respa; // rRESPA fix needed by SHAKE - int nlevels_respa; // copies of needed rRESPA variables - int *loop_respa; - double *step_respa; - - double **x,**v,**f; // local ptrs to atom class quantities - double *mass,*rmass; - int *type; - int nlocal; - // atom-based arrays - int *shake_flag; // 0 if atom not in SHAKE cluster - // 1 = size 3 angle cluster - // 2,3,4 = size of bond-only cluster - int **shake_atom; // global IDs of atoms in cluster - // central atom is 1st - // lowest global ID is 1st for size 2 - - int **shake_type; // bondtype of each bond in cluster - // for angle cluster, 3rd value - // is angletype - double **xshake; // unconstrained atom coords - cCudaData<int , int , xx >* cu_shake_flag; - cCudaData<int , int , yx >* cu_shake_atom; - cCudaData<int , int , yx >* cu_shake_type; - cCudaData<double , X_CFLOAT , xy >* cu_xshake; - cCudaData<int , int , xx >* cu_list; - cCudaData<double , ENERGY_CFLOAT , xx >* cu_virial; - - double dtv,dtfsq; // timesteps for trial move - double dtf_inner,dtf_innerhalf; // timesteps for rRESPA trial move - - int *list; // list of clusters to SHAKE - int nlist,maxlist; // size and max-size of list - - // stat quantities - int *b_count,*b_count_all; // counts for each bond type - double *b_ave,*b_max,*b_min; // ave/max/min dist for each bond type - double *b_ave_all,*b_max_all,*b_min_all; // MPI summing arrays - int *a_count,*a_count_all; // ditto for angle types - double *a_ave,*a_max,*a_min; - double *a_ave_all,*a_max_all,*a_min_all; - - void find_clusters(); - void swap_clusters(int i,int j); - int masscheck(double); - void unconstrained_update(); - void shake2(int); - void shake3(int); - void shake4(int); - void shake3angle(int); - void stats(); - int bondfind(int, int, int); - int anglefind(int, int, int); -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_temp_berendsen_cuda.cpp b/src/USER-CUDA/fix_temp_berendsen_cuda.cpp deleted file mode 100644 index ee08aa3462..0000000000 --- a/src/USER-CUDA/fix_temp_berendsen_cuda.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstring> -#include <cstdlib> -#include <cmath> -#include "fix_temp_berendsen_cuda.h" -#include "fix_temp_berendsen_cuda_cu.h" -#include "atom.h" -#include "force.h" -#include "group.h" -#include "update.h" -#include "comm.h" -#include "modify.h" -#include "compute.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -enum{NOBIAS,BIAS}; - -/* ---------------------------------------------------------------------- */ - -FixTempBerendsenCuda::FixTempBerendsenCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg != 6) error->all(FLERR,"Illegal fix temp/berendsen/cuda command"); - - // Berendsen thermostat should be applied every step - - nevery = 1; - - t_start = force->numeric(FLERR,arg[3]); - t_stop = force->numeric(FLERR,arg[4]); - t_period = force->numeric(FLERR,arg[5]); - - // error checks - - if (t_period <= 0.0) error->all(FLERR,"Fix temp/berendsen/cuda period must be > 0.0"); - - // create a new compute temp style - // id = fix-ID + temp, compute group = fix group - - int n = strlen(id) + 6; - id_temp = new char[n]; - strcpy(id_temp,id); - strcat(id_temp,"_temp"); - - char **newarg = new char*[3]; - newarg[0] = id_temp; - newarg[1] = group->names[igroup]; - newarg[2] = (char *) "temp/cuda"; - modify->add_compute(3,newarg); - delete [] newarg; - tflag = 1; -} - -/* ---------------------------------------------------------------------- */ - -FixTempBerendsenCuda::~FixTempBerendsenCuda() -{ - // delete temperature if fix created it - - if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; -} - -/* ---------------------------------------------------------------------- */ - -int FixTempBerendsenCuda::setmask() -{ - int mask = 0; - mask |= END_OF_STEP_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixTempBerendsenCuda::init() -{ - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix temp/berendsen/cuda does not exist"); - temperature = modify->compute[icompute]; - if(not temperature->cudable) - error->warning(FLERR,"Fix temp/berendsen/cuda uses non cudable temperature compute"); - if (temperature->tempbias) which = BIAS; - else which = NOBIAS; - - //temperature->init(); //not in original berendsen possible error? -} - -/* ---------------------------------------------------------------------- */ - -void FixTempBerendsenCuda::end_of_step() -{ - double t_current; - if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();} - t_current = temperature->compute_scalar(); - if (t_current == 0.0) - error->all(FLERR,"Computed temperature for fix temp/berendsen/cuda cannot be 0.0"); - - double delta = update->ntimestep - update->beginstep; - delta /= update->endstep - update->beginstep; - t_target = t_start + delta * (t_stop-t_start); - - // rescale velocities by lamda - - double lamda = sqrt(1.0 + update->dt/t_period*(t_target/t_current - 1.0)); - - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - if (which == NOBIAS) { - Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda); - - } else { - if(not temperature->cudable) - { - cuda->cu_x->download();cuda->cu_v->download(); - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - v[i][0] *= lamda; - v[i][1] *= lamda; - v[i][2] *= lamda; - temperature->restore_bias(i,v[i]); - } - } - cuda->cu_v->upload(); - } - else - { - temperature->remove_bias_all(); - Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda); - temperature->restore_bias_all(); - } - } - - -} - -/* ---------------------------------------------------------------------- */ - -int FixTempBerendsenCuda::modify_param(int narg, char **arg) -{ - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (tflag) { - modify->delete_compute(id_temp); - tflag = 0; - } - delete [] id_temp; - int n = strlen(arg[1]) + 1; - id_temp = new char[n]; - strcpy(id_temp,arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - - if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); - if (temperature->igroup != igroup && comm->me == 0) - error->warning(FLERR,"Group for fix_modify temp != fix group"); - return 2; - } - return 0; -} - - -/* ---------------------------------------------------------------------- */ - -void FixTempBerendsenCuda::reset_target(double t_new) -{ - t_start = t_stop = t_new; -} diff --git a/src/USER-CUDA/fix_temp_berendsen_cuda.h b/src/USER-CUDA/fix_temp_berendsen_cuda.h deleted file mode 100644 index 610e5421e5..0000000000 --- a/src/USER-CUDA/fix_temp_berendsen_cuda.h +++ /dev/null @@ -1,58 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ -#ifdef FIX_CLASS - -FixStyle(temp/berendsen/cuda,FixTempBerendsenCuda) - -#else - -#ifndef LMP_FIX_TEMP_BERENDSEN_CUDA_H -#define LMP_FIX_TEMP_BERENDSEN_CUDA_H - -#include "fix.h" - -namespace LAMMPS_NS { -class FixTempBerendsenCuda : public Fix { - public: - FixTempBerendsenCuda(class LAMMPS *, int, char **); - ~FixTempBerendsenCuda(); - int setmask(); - void init(); - void end_of_step(); - int modify_param(int, char **); - void reset_target(double); - - private: - class Cuda *cuda; - int which; - double t_start,t_stop,t_target,t_period; - - char *id_temp; - class Compute *temperature; - int tflag; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_temp_rescale_cuda.cpp b/src/USER-CUDA/fix_temp_rescale_cuda.cpp deleted file mode 100644 index a0ebb47d12..0000000000 --- a/src/USER-CUDA/fix_temp_rescale_cuda.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cstring> -#include <cstdlib> -#include <cmath> -#include "fix_temp_rescale_cuda.h" -#include "fix_temp_rescale_cuda_cu.h" -#include "atom.h" -#include "force.h" -#include "group.h" -#include "update.h" -#include "domain.h" -#include "region.h" -#include "comm.h" -#include "modify.h" -#include "compute.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -enum{NOBIAS,BIAS}; - -/* ---------------------------------------------------------------------- */ - -FixTempRescaleCuda::FixTempRescaleCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg < 8) error->all(FLERR,"Illegal fix temp/rescale/cuda command"); - - nevery = force->inumeric(FLERR,arg[3]); - if (nevery <= 0) error->all(FLERR,"Illegal fix temp/rescale/cuda command"); - - scalar_flag = 1; - global_freq = nevery; - extscalar = 1; - - t_start = force->numeric(FLERR,arg[4]); - t_stop = force->numeric(FLERR,arg[5]); - t_window = force->numeric(FLERR,arg[6]); - fraction = force->numeric(FLERR,arg[7]); - - // create a new compute temp - // id = fix-ID + temp, compute group = fix group - - int n = strlen(id) + 6; - id_temp = new char[n]; - strcpy(id_temp,id); - strcat(id_temp,"_temp"); - - char **newarg = new char*[6]; - newarg[0] = id_temp; - newarg[1] = group->names[igroup]; - newarg[2] = (char *) "temp/cuda"; - modify->add_compute(3,newarg); - delete [] newarg; - tflag = 1; - - energy = 0.0; -} - -/* ---------------------------------------------------------------------- */ - -FixTempRescaleCuda::~FixTempRescaleCuda() -{ - // delete temperature if fix created it - - if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; -} - -/* ---------------------------------------------------------------------- */ - -int FixTempRescaleCuda::setmask() -{ - int mask = 0; - mask |= END_OF_STEP_CUDA; - mask |= THERMO_ENERGY_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleCuda::init() -{ - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix temp/rescale/cuda does not exist"); - temperature = modify->compute[icompute]; - if(not temperature->cudable) - error->warning(FLERR,"Fix temp/rescale/cuda uses non cudable temperature compute"); - if (temperature->tempbias) which = BIAS; - else which = NOBIAS; -} - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleCuda::end_of_step() -{ - double t_current; - if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();} - t_current = temperature->compute_scalar(); - if (t_current == 0.0) - error->all(FLERR,"Computed temperature for fix temp/rescale/cuda cannot be 0.0"); - - double delta = update->ntimestep - update->beginstep; - delta /= update->endstep - update->beginstep; - double t_target = t_start + delta * (t_stop-t_start); - - // rescale velocity of appropriate atoms if outside window - - if (fabs(t_current-t_target) > t_window) { - t_target = t_current - fraction*(t_current-t_target); - double factor = sqrt(t_target/t_current); - double efactor = 0.5 * force->boltz * temperature->dof; - - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - if (which == NOBIAS) { - energy += (t_current-t_target) * efactor; - - Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor); - - } else if (which == BIAS) { - energy += (t_current-t_target) * efactor; - if(not temperature->cudable) - { - cuda->cu_x->download();cuda->cu_v->download(); - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - v[i][0] *= factor; - v[i][1] *= factor; - v[i][2] *= factor; - temperature->restore_bias(i,v[i]); - } - } - cuda->cu_v->upload(); - } - else - { - temperature->remove_bias_all(); - Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor); - temperature->restore_bias_all(); - } - } - - } -} - -/* ---------------------------------------------------------------------- */ - -int FixTempRescaleCuda::modify_param(int narg, char **arg) -{ - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (tflag) { - modify->delete_compute(id_temp); - tflag = 0; - } - delete [] id_temp; - int n = strlen(arg[1]) + 1; - id_temp = new char[n]; - strcpy(id_temp,arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - - if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); - if (temperature->igroup != igroup && comm->me == 0) - error->warning(FLERR,"Group for fix_modify temp != fix group"); - if(not temperature->cudable) - error->warning(FLERR,"Fix temp/rescale/cuda uses non cudable temperature compute"); - return 2; - } - return 0; -} - - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleCuda::reset_target(double t_new) -{ - t_start = t_stop = t_new; -} - -/* ---------------------------------------------------------------------- */ - -double FixTempRescaleCuda::compute_scalar() -{ - return energy; -} diff --git a/src/USER-CUDA/fix_temp_rescale_cuda.h b/src/USER-CUDA/fix_temp_rescale_cuda.h deleted file mode 100644 index 3bdc71a1a0..0000000000 --- a/src/USER-CUDA/fix_temp_rescale_cuda.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(temp/rescale/cuda,FixTempRescaleCuda) - -#else - -#ifndef FIX_TEMP_RESCALE_CUDA_H -#define FIX_TEMP_RESCALE_CUDA_H - -#include "fix.h" - -namespace LAMMPS_NS { -class FixTempRescaleCuda : public Fix { - public: - FixTempRescaleCuda(class LAMMPS *, int, char **); - ~FixTempRescaleCuda(); - int setmask(); - void init(); - void end_of_step(); - int modify_param(int, char **); - void reset_target(double); - double compute_scalar(); - - private: - class Cuda *cuda; - int which; - double t_start,t_stop,t_window; - double fraction,energy,efactor; - - char *id_temp; - class Compute *temperature; - int tflag; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_temp_rescale_limit_cuda.cpp b/src/USER-CUDA/fix_temp_rescale_limit_cuda.cpp deleted file mode 100644 index eb8cf8d948..0000000000 --- a/src/USER-CUDA/fix_temp_rescale_limit_cuda.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cstring> -#include <cstdlib> -#include <cmath> -#include "fix_temp_rescale_limit_cuda.h" -#include "fix_temp_rescale_limit_cuda_cu.h" -#include "atom.h" -#include "force.h" -#include "group.h" -#include "update.h" -#include "domain.h" -#include "region.h" -#include "comm.h" -#include "modify.h" -#include "compute.h" -#include "error.h" -#include "user_cuda.h" -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -enum{NOBIAS,BIAS}; - -/* ---------------------------------------------------------------------- */ - -FixTempRescaleLimitCuda::FixTempRescaleLimitCuda(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if (narg < 9) error->all(FLERR,"Illegal fix temp/rescale/limit/cuda command"); - - nevery = force->inumeric(FLERR,arg[3]); - if (nevery <= 0) error->all(FLERR,"Illegal fix temp/rescale/limit/cuda command"); - - scalar_flag = 1; - global_freq = nevery; - extscalar = 1; - - t_start = force->numeric(FLERR,arg[4]); - t_stop = force->numeric(FLERR,arg[5]); - t_window = force->numeric(FLERR,arg[6]); - fraction = force->numeric(FLERR,arg[7]); - limit = force->numeric(FLERR,arg[8]); - if (limit <= 1.0) error->all(FLERR,"Illegal fix temp/rescale/limit/cuda command (limit must be > 1.0)"); - - - // create a new compute temp - // id = fix-ID + temp, compute group = fix group - - int n = strlen(id) + 6; - id_temp = new char[n]; - strcpy(id_temp,id); - strcat(id_temp,"_temp"); - - char **newarg = new char*[6]; - newarg[0] = id_temp; - newarg[1] = group->names[igroup]; - newarg[2] = (char *) "temp/cuda"; - modify->add_compute(3,newarg); - delete [] newarg; - tflag = 1; - - energy = 0.0; -} - -/* ---------------------------------------------------------------------- */ - -FixTempRescaleLimitCuda::~FixTempRescaleLimitCuda() -{ - // delete temperature if fix created it - - if (tflag) modify->delete_compute(id_temp); - delete [] id_temp; -} - -/* ---------------------------------------------------------------------- */ - -int FixTempRescaleLimitCuda::setmask() -{ - int mask = 0; - mask |= END_OF_STEP_CUDA; - mask |= THERMO_ENERGY_CUDA; - return mask; -} - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleLimitCuda::init() -{ - int icompute = modify->find_compute(id_temp); - if (icompute < 0) - error->all(FLERR,"Temperature ID for fix temp/rescale/limit/cuda does not exist"); - temperature = modify->compute[icompute]; - if(not temperature->cudable) - error->warning(FLERR,"Fix temp/rescale/limit/cuda uses non cudable temperature compute"); - if (temperature->tempbias) which = BIAS; - else which = NOBIAS; -} - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleLimitCuda::end_of_step() -{ - double t_current; - if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();} - t_current = temperature->compute_scalar(); - if (t_current == 0.0) - error->all(FLERR,"Computed temperature for fix temp/rescale/limit/cuda cannot be 0.0"); - - double delta = update->ntimestep - update->beginstep; - delta /= update->endstep - update->beginstep; - double t_target = t_start + delta * (t_stop-t_start); - - // rescale velocity of appropriate atoms if outside window - - if (fabs(t_current-t_target) > t_window) { - t_target = t_current - fraction*(t_current-t_target); - double factor = sqrt(t_target/t_current); - double efactor = 0.5 * force->boltz * temperature->dof; - - double **v = atom->v; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double massone; - if(atom->rmass) massone = atom->rmass[0]; - else massone = atom->mass[0]; - - double current_limit=sqrt(limit*force->boltz*t_target*temperature->dof/massone/force->mvv2e); - if (which == NOBIAS) { - energy += (t_current-t_target) * efactor; - - - Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit); - - } else if (which == BIAS) { - energy += (t_current-t_target) * efactor; - if(not temperature->cudable) - { - cuda->cu_x->download();cuda->cu_v->download(); - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - temperature->remove_bias(i,v[i]); - double vx = v[i][0] * factor; - double vy = v[i][1] * factor; - double vz = v[i][2] * factor; - v[i][0]=vx>0?MIN(vx,current_limit):MAX(vx,-current_limit); - v[i][1]=vy>0?MIN(vy,current_limit):MAX(vy,-current_limit); - v[i][2]=vz>0?MIN(vz,current_limit):MAX(vz,-current_limit); - - temperature->restore_bias(i,v[i]); - } - } - cuda->cu_v->upload(); - } - else - { - temperature->remove_bias_all(); - Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit); - temperature->restore_bias_all(); - } - } - - } -} - -/* ---------------------------------------------------------------------- */ - -int FixTempRescaleLimitCuda::modify_param(int narg, char **arg) -{ - if (strcmp(arg[0],"temp") == 0) { - if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); - if (tflag) { - modify->delete_compute(id_temp); - tflag = 0; - } - delete [] id_temp; - int n = strlen(arg[1]) + 1; - id_temp = new char[n]; - strcpy(id_temp,arg[1]); - - int icompute = modify->find_compute(id_temp); - if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); - temperature = modify->compute[icompute]; - - if (temperature->tempflag == 0) - error->all(FLERR,"Fix_modify temperature ID does not compute temperature"); - if (temperature->igroup != igroup && comm->me == 0) - error->warning(FLERR,"Group for fix_modify temp != fix group"); - if(not temperature->cudable) - error->warning(FLERR,"Fix temp/rescale/limit/cuda uses non cudable temperature compute"); - return 2; - } - return 0; -} - - -/* ---------------------------------------------------------------------- */ - -void FixTempRescaleLimitCuda::reset_target(double t_new) -{ - t_start = t_stop = t_new; -} - -/* ---------------------------------------------------------------------- */ - -double FixTempRescaleLimitCuda::compute_scalar() -{ - return energy; -} diff --git a/src/USER-CUDA/fix_temp_rescale_limit_cuda.h b/src/USER-CUDA/fix_temp_rescale_limit_cuda.h deleted file mode 100644 index b2bba2049a..0000000000 --- a/src/USER-CUDA/fix_temp_rescale_limit_cuda.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(temp/rescale/limit/cuda,FixTempRescaleLimitCuda) - -#else - -#ifndef FIX_TEMP_RESCALE_LIMIT_CUDA_H -#define FIX_TEMP_RESCALE_LIMIT_CUDA_H - -#include "fix.h" - -namespace LAMMPS_NS { -class FixTempRescaleLimitCuda : public Fix { - public: - FixTempRescaleLimitCuda(class LAMMPS *, int, char **); - ~FixTempRescaleLimitCuda(); - int setmask(); - void init(); - void end_of_step(); - int modify_param(int, char **); - void reset_target(double); - double compute_scalar(); - - private: - class Cuda *cuda; - int which; - double t_start,t_stop,t_window; - double fraction,energy,efactor; - double limit; - char *id_temp; - class Compute *temperature; - int tflag; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/fix_viscous_cuda.cpp b/src/USER-CUDA/fix_viscous_cuda.cpp deleted file mode 100644 index 09871c86e3..0000000000 --- a/src/USER-CUDA/fix_viscous_cuda.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdlib> -#include <cstring> -#include "fix_viscous_cuda.h" -#include "fix_viscous_cuda_cu.h" -#include "atom.h" -#include "update.h" -#include "respa.h" -#include "error.h" -#include "cuda_modify_flags.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -/* ---------------------------------------------------------------------- */ - -FixViscousCuda::FixViscousCuda(LAMMPS *lmp, int narg, char **arg) : - FixViscous(lmp, narg, arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - cu_gamma=NULL; -} - -/* ---------------------------------------------------------------------- */ - -FixViscousCuda::~FixViscousCuda() -{ - delete cu_gamma; -} - -/* ---------------------------------------------------------------------- */ - -int FixViscousCuda::setmask() -{ - int mask = 0; - mask |= POST_FORCE_CUDA; - // mask |= POST_FORCE_RESPA; - // mask |= MIN_POST_FORCE; - return mask; -} - - -/* ---------------------------------------------------------------------- */ - -void FixViscousCuda::setup(int vflag) -{ - if(not cu_gamma) - cu_gamma = new cCudaData<double, F_CFLOAT, x> (gamma,atom->ntypes+1); - Cuda_FixViscousCuda_Init(&cuda->shared_data); - cu_gamma->upload(); - // if (strcmp(update->integrate_style,"verlet/cuda") == 0) - post_force(vflag); - /* else { - ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); - post_force_respa(vflag,nlevels_respa-1,0); - ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); - }*/ -} - -/* ---------------------------------------------------------------------- */ - -void FixViscousCuda::min_setup(int vflag) -{ - Cuda_FixViscousCuda_Init(&cuda->shared_data); - post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixViscousCuda::post_force(int vflag) -{ - // apply drag force to atoms in group - // direction is opposed to velocity vector - // magnitude depends on atom type - - Cuda_FixViscousCuda_PostForce(&cuda->shared_data, groupbit,cu_gamma->dev_data()); -} diff --git a/src/USER-CUDA/fix_viscous_cuda.h b/src/USER-CUDA/fix_viscous_cuda.h deleted file mode 100644 index e0cb6ba4b0..0000000000 --- a/src/USER-CUDA/fix_viscous_cuda.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef FIX_CLASS - -FixStyle(viscous/cuda,FixViscousCuda) - -#else - -#ifndef LMP_FIX_VISCOUS_CUDA_H -#define LMP_FIX_VISCOUS_CUDA_H - -#include "fix_viscous.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class FixViscousCuda : public FixViscous { - public: - FixViscousCuda(class LAMMPS *, int, char **); - ~FixViscousCuda(); - int setmask(); - void setup(int); - void min_setup(int); - void post_force(int); - cCudaData<double, F_CFLOAT, x>* cu_gamma; - - private: - class Cuda *cuda; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/modify_cuda.cpp b/src/USER-CUDA/modify_cuda.cpp deleted file mode 100644 index 82d6d92036..0000000000 --- a/src/USER-CUDA/modify_cuda.cpp +++ /dev/null @@ -1,437 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <cstdio> -#include <cstring> -#include "modify_cuda.h" -#include "style_compute.h" -#include "style_fix.h" -#include "atom.h" -#include "comm.h" -#include "fix.h" -#include "compute.h" -#include "group.h" -#include "update.h" -#include "domain.h" -#include "user_cuda.h" -#include "memory.h" -#include "error.h" - -#include "cuda_modify_flags.h" - -using namespace LAMMPS_NS; -using namespace FixConst; -using namespace FixConstCuda; - -#define DELTA 4 - -#define BIG 1.0e20 - - -/* ---------------------------------------------------------------------- */ - -ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - n_initial_integrate_cuda = 0; - n_post_integrate_cuda = 0; - n_pre_exchange = 0; - n_pre_neighbor_cuda = 0; - n_pre_force_cuda = 0; - n_post_force_cuda = 0; - n_final_integrate_cuda = 0; - n_end_of_step_cuda = 0; - n_thermo_energy_cuda = 0; - - n_initial_integrate_host = 0; - n_post_integrate_host = 0; - n_pre_exchange = 0; - n_pre_neighbor_host = 0; - n_pre_force_host = 0; - n_post_force_host = 0; - n_final_integrate_host = 0; - n_end_of_step_host = 0; - n_thermo_energy_host = 0; - - list_initial_integrate_cuda = NULL; - list_post_integrate_cuda = NULL; - list_pre_exchange_cuda = NULL; - list_pre_neighbor_cuda = NULL; - list_pre_force_cuda = NULL; - list_post_force_cuda = NULL; - list_final_integrate_cuda = NULL; - list_end_of_step_cuda = NULL; - list_thermo_energy_cuda = NULL; - end_of_step_every_cuda = NULL; -} - -/* ---------------------------------------------------------------------- */ - -ModifyCuda::~ModifyCuda() -{ - delete [] list_initial_integrate_cuda; - delete [] list_post_integrate_cuda; - delete [] list_pre_exchange_cuda; - delete [] list_pre_neighbor_cuda; - delete [] list_pre_force_cuda; - delete [] list_post_force_cuda; - delete [] list_final_integrate_cuda; - delete [] list_end_of_step_cuda; - delete [] list_thermo_energy_cuda; - delete [] end_of_step_every_cuda; -} - -/* ---------------------------------------------------------------------- - initialize all fixes and computes -------------------------------------------------------------------------- */ - -void ModifyCuda::init() -{ - int i,j; - - // delete storage of restart info since it is not valid after 1st run - - restart_deallocate(); - - // create lists of fixes to call at each stage of run - - list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate); - list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate); - list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange); - list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor); - list_init(PRE_FORCE,n_pre_force,list_pre_force); - list_init(POST_FORCE,n_post_force,list_post_force); - list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate); - list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step); - list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy); - - list_init(INITIAL_INTEGRATE_CUDA, n_initial_integrate_cuda, list_initial_integrate_cuda); - list_init(POST_INTEGRATE_CUDA, n_post_integrate_cuda, list_post_integrate_cuda); - list_init(PRE_EXCHANGE_CUDA, n_pre_exchange_cuda, list_pre_exchange_cuda); - list_init(PRE_NEIGHBOR_CUDA, n_pre_neighbor_cuda, list_pre_neighbor_cuda); - list_init(PRE_FORCE_CUDA, n_pre_force_cuda, list_pre_force_cuda); - list_init(POST_FORCE_CUDA, n_post_force_cuda, list_post_force_cuda); - list_init(FINAL_INTEGRATE_CUDA, n_final_integrate_cuda, list_final_integrate_cuda); - list_init_end_of_step_cuda(END_OF_STEP_CUDA, n_end_of_step_cuda, list_end_of_step_cuda); - list_init_thermo_energy(THERMO_ENERGY_CUDA, n_thermo_energy_cuda, list_thermo_energy_cuda); - - n_initial_integrate_host = n_initial_integrate; - n_post_integrate_host = n_post_integrate; - n_pre_exchange_host = n_pre_exchange; - n_pre_neighbor_host = n_pre_neighbor; - n_pre_force_host = n_pre_force; - n_post_force_host = n_post_force; - n_final_integrate_host = n_final_integrate; - n_end_of_step_host = n_end_of_step; - n_thermo_energy_host = n_thermo_energy; - - n_initial_integrate = n_initial_integrate_cuda+n_initial_integrate_host; - n_post_integrate = n_post_integrate_cuda+n_post_integrate_host; - n_pre_exchange = n_pre_exchange_cuda+n_pre_exchange_host; - n_pre_neighbor = n_pre_neighbor_cuda+n_pre_neighbor_host; - n_pre_force = n_pre_force_cuda+n_pre_force_host; - n_post_force = n_post_force_cuda+n_post_force_host; - n_final_integrate = n_final_integrate_cuda+n_final_integrate_host; - n_end_of_step = n_end_of_step_cuda+n_end_of_step_host; - n_thermo_energy = n_thermo_energy_cuda+n_thermo_energy_host; - - list_init(INITIAL_INTEGRATE_RESPA, - n_initial_integrate_respa,list_initial_integrate_respa); - list_init(POST_INTEGRATE_RESPA, - n_post_integrate_respa,list_post_integrate_respa); - list_init(POST_FORCE_RESPA, - n_post_force_respa,list_post_force_respa); - list_init(PRE_FORCE_RESPA, - n_pre_force_respa,list_pre_force_respa); - list_init(FINAL_INTEGRATE_RESPA, - n_final_integrate_respa,list_final_integrate_respa); - - list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange); - list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force); - list_init(MIN_ENERGY,n_min_energy,list_min_energy); - - // init each fix - // needs to come before compute init - // this is b/c some computes call fix->dof() - // FixRigid::dof() depends on its own init having been called - - for (i = 0; i < nfix; i++) fix[i]->init(); - - // set global flag if any fix has its restart_pbc flag set - - restart_pbc_any = 0; - for (i = 0; i < nfix; i++) - if (fix[i]->restart_pbc) restart_pbc_any = 1; - - // create list of computes that store invocation times - - list_init_compute(); - - // init each compute - // set invoked_scalar,vector,etc to -1 to force new run to re-compute them - // add initial timestep to all computes that store invocation times - // since any of them may be invoked by initial thermo - // do not clear out invocation times stored within a compute, - // b/c some may be holdovers from previous run, like for ave fixes - - for (i = 0; i < ncompute; i++) { - compute[i]->init(); - compute[i]->invoked_scalar = -1; - compute[i]->invoked_vector = -1; - compute[i]->invoked_array = -1; - compute[i]->invoked_peratom = -1; - compute[i]->invoked_local = -1; - } - addstep_compute_all(update->ntimestep); - - // warn if any particle is time integrated more than once - - int nlocal = atom->nlocal; - int *mask = atom->mask; - - int *flag = new int[nlocal]; - for (i = 0; i < nlocal; i++) flag[i] = 0; - - int groupbit; - for (i = 0; i < nfix; i++) { - if (fix[i]->time_integrate == 0) continue; - groupbit = fix[i]->groupbit; - for (j = 0; j < nlocal; j++) - if (mask[j] & groupbit) flag[j]++; - } - - int check = 0; - for (i = 0; i < nlocal; i++) - if (flag[i] > 1) check = 1; - - delete [] flag; - - int checkall; - MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world); - if (comm->me == 0 && checkall) - error->warning(FLERR,"One or more atoms are time integrated more than once"); -} - -/* ---------------------------------------------------------------------- - 1st half of integrate call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::initial_integrate(int vflag) -{ - for(int i = 0; i < n_initial_integrate_cuda; i++) - fix[list_initial_integrate_cuda[i]]->initial_integrate(vflag); - - if(n_initial_integrate_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_initial_integrate_host; i++) - fix[list_initial_integrate[i]]->initial_integrate(vflag); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - post_integrate call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::post_integrate() -{ - for(int i = 0; i < n_post_integrate_cuda; i++) - fix[list_post_integrate_cuda[i]]->post_integrate(); - - if(n_post_integrate_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_post_integrate_host; i++) - fix[list_post_integrate[i]]->post_integrate(); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - pre_exchange call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::pre_exchange() -{ - for(int i = 0; i < n_pre_exchange_cuda; i++) - fix[list_pre_exchange_cuda[i]]->pre_exchange(); - - if(n_pre_exchange_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_pre_exchange_host; i++) - fix[list_pre_exchange[i]]->pre_exchange(); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - pre_neighbor call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::pre_neighbor() -{ - for(int i = 0; i < n_pre_neighbor_cuda; i++) - fix[list_pre_neighbor_cuda[i]]->pre_neighbor(); - - if(n_pre_neighbor_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_pre_neighbor_host; i++) - fix[list_pre_neighbor[i]]->pre_neighbor(); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - pre_force call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::setup_pre_force(int vflag) -{ - for(int i = 0; i < n_pre_force_cuda; i++) - fix[list_pre_force_cuda[i]]->pre_force(vflag); - - if(n_pre_force_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_pre_force_host; i++) - fix[list_pre_force[i]]->pre_force(vflag); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -void ModifyCuda::pre_force(int vflag) -{ - for(int i = 0; i < n_pre_force_cuda; i++) - fix[list_pre_force_cuda[i]]->pre_force(vflag); - - if(n_pre_force_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_pre_force_host; i++) - fix[list_pre_force[i]]->pre_force(vflag); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - post_force call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::post_force(int vflag) -{ - for(int i = 0; i < n_post_force_cuda; i++) - fix[list_post_force_cuda[i]]->post_force(vflag); - - if(n_post_force_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_post_force_host; i++) - fix[list_post_force[i]]->post_force(vflag); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - 2nd half of integrate call, only for relevant fixes -------------------------------------------------------------------------- */ - -void ModifyCuda::final_integrate() -{ - for (int i = 0; i < n_final_integrate_cuda; i++) - fix[list_final_integrate_cuda[i]]->final_integrate(); - - if(n_final_integrate_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_final_integrate_host; i++) - fix[list_final_integrate[i]]->final_integrate(); - cuda->uploadAll(); cuda->oncpu = false; - } -} - -/* ---------------------------------------------------------------------- - end-of-timestep call, only for relevant fixes - only call fix->end_of_step() on timesteps that are multiples of nevery -------------------------------------------------------------------------- */ - -void ModifyCuda::end_of_step() -{ - for (int i = 0; i < n_end_of_step_cuda; i++) - if (update->ntimestep % end_of_step_every_cuda[i] == 0) - fix[list_end_of_step_cuda[i]]->end_of_step(); - - if(n_end_of_step_host != 0) - { - int do_thisstep=0; - for (int i = 0; i < n_end_of_step_host; i++) - if (update->ntimestep % end_of_step_every[i] == 0) do_thisstep=1; - if(do_thisstep) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_end_of_step_host; i++) - if (update->ntimestep % end_of_step_every[i] == 0) - fix[list_end_of_step[i]]->end_of_step(); - cuda->uploadAll(); cuda->oncpu = false; - } - } -} - -/* ---------------------------------------------------------------------- - thermo energy call, only for relevant fixes - called by Thermo class - compute_scalar() is fix call to return energy -------------------------------------------------------------------------- */ - -double ModifyCuda::thermo_energy() -{ - double energy = 0.0; - - for (int i = 0; i < n_thermo_energy_cuda; i++) - energy += fix[list_thermo_energy_cuda[i]]->compute_scalar(); - - if(n_thermo_energy_host != 0) - { - cuda->downloadAll(); cuda->oncpu = true; - for (int i = 0; i < n_thermo_energy_host; i++) - energy += fix[list_thermo_energy[i]]->compute_scalar(); - cuda->uploadAll(); cuda->oncpu = false; - } - - return energy; -} - - - -void ModifyCuda::list_init_end_of_step_cuda(int mask, int &n, int *&list) -{ - delete [] list; - delete [] end_of_step_every_cuda; - - n = 0; - for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++; - list = new int[n]; - end_of_step_every_cuda = new int[n]; - - n = 0; - for (int i = 0; i < nfix; i++) - if (fmask[i] & mask) { - list[n] = i; - end_of_step_every_cuda[n++] = fix[i]->nevery; - } -} diff --git a/src/USER-CUDA/modify_cuda.h b/src/USER-CUDA/modify_cuda.h deleted file mode 100644 index bfea217046..0000000000 --- a/src/USER-CUDA/modify_cuda.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_MODIFY_CUDA_H -#define LMP_MODIFY_CUDA_H - -#include <cstdio> -#include "modify.h" - -namespace LAMMPS_NS { - -class ModifyCuda : public Modify { - public: - - int n_initial_integrate_cuda; - int n_post_integrate_cuda; - int n_pre_exchange_cuda; - int n_pre_neighbor_cuda; - int n_pre_force_cuda; - int n_post_force_cuda; - int n_final_integrate_cuda; - int n_end_of_step_cuda; - int n_thermo_energy_cuda; - - int n_initial_integrate_host; - int n_post_integrate_host; - int n_pre_exchange_host; - int n_pre_neighbor_host; - int n_pre_force_host; - int n_post_force_host; - int n_final_integrate_host; - int n_end_of_step_host; - int n_thermo_energy_host; - - ModifyCuda(class LAMMPS *); - ~ModifyCuda(); - void init(); - void initial_integrate(int); - void post_integrate(); - //void pre_decide(); - void pre_exchange(); - void pre_neighbor(); - void setup_pre_force(int); - void pre_force(int); - void post_force(int); - void final_integrate(); - void end_of_step(); - double thermo_energy(); - - - protected: - class Cuda *cuda; - - // lists of fixes to apply at different stages of timestep - - // list of cuda fixes - int *list_initial_integrate_cuda; - int *list_post_integrate_cuda; - int *list_pre_exchange_cuda; - int *list_pre_neighbor_cuda; - int *list_pre_force_cuda; - int *list_post_force_cuda; - int *list_final_integrate_cuda; - int *list_end_of_step_cuda; - int *list_thermo_energy_cuda; - int *end_of_step_every_cuda; - - void list_init_end_of_step_cuda(int, int &, int *&); -}; - -} - -#endif diff --git a/src/USER-CUDA/neigh_full_cuda.cpp b/src/USER-CUDA/neigh_full_cuda.cpp deleted file mode 100644 index 5fd69f1105..0000000000 --- a/src/USER-CUDA/neigh_full_cuda.cpp +++ /dev/null @@ -1,307 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include "neighbor_cuda.h" -#include "neigh_list.h" -#include "atom.h" -#include "domain.h" -#include "group.h" -#include "error.h" -#include "cuda_neigh_list.h" -#include "user_cuda.h" -#include "neighbor_cu.h" -#include <cmath> -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- - N^2 search for all neighbors - every neighbor pair appears in list of both atoms i and j -------------------------------------------------------------------------- */ -void NeighborCuda::full_bin_cuda(NeighList *list) -{ - MYDBG(printf(" # CUDA::NeighFullBinCuda ... start\n");) - if(includegroup) error->warning(FLERR,"Warning using inlcudegroup neighborbuild. This is not yet supported by CUDA neighborbuild styles.\n"); - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; - - if(nlocal==0) return; - CudaNeighList* clist=list->cuda_list; - cuda_shared_neighlist* slist=&clist->sneighlist; - - if(not clist) cuda->registerNeighborList(list); - - clist->build_cuda=true; - - if(slist->bin_extraspace<0.09) - { - for(int i=1;i<=atom->ntypes;i++) - for(int j=1;j<=atom->ntypes;j++) - { - if(slist->maxcut<cutneighsq[i][j]) slist->maxcut=cutneighsq[i][j]; - } - slist->maxcut=sqrt(slist->maxcut); - } - int bin_dim_tmp[3]; - int bin_nmax_tmp; - do - { - do - { - bin_dim_tmp[0]=static_cast <int> ((domain->subhi[0]-domain->sublo[0])/slist->maxcut); - bin_dim_tmp[1]=static_cast <int> ((domain->subhi[1]-domain->sublo[1])/slist->maxcut); - bin_dim_tmp[2]=static_cast <int> ((domain->subhi[2]-domain->sublo[2])/slist->maxcut); - if(bin_dim_tmp[0]==0) bin_dim_tmp[0]+=1; - if(bin_dim_tmp[1]==0) bin_dim_tmp[1]+=1; - if(bin_dim_tmp[2]==0) bin_dim_tmp[2]+=1; - bin_nmax_tmp=static_cast <int> ((1.0+slist->bin_extraspace)*nlocal/(bin_dim_tmp[0]*bin_dim_tmp[1]*bin_dim_tmp[2])); - bin_dim_tmp[0]+=4; - bin_dim_tmp[1]+=4; - bin_dim_tmp[2]+=4; - if(bin_nmax_tmp<32) slist->maxcut*=1.2; - // printf("slist->maxcut: %lf\n", slist->maxcut); - } while(bin_nmax_tmp<32); - if((slist->bin_dim[0]!=bin_dim_tmp[0])||(slist->bin_dim[1]!=bin_dim_tmp[1])||(slist->bin_dim[2]!=bin_dim_tmp[2])||(slist->bin_nmax!=bin_nmax_tmp)) - { - if(slist->binned_id!=NULL) - CudaWrapper_FreeCudaData(slist->binned_id,slist->bin_dim[0]*slist->bin_dim[1]*slist->bin_dim[2]*slist->bin_nmax*sizeof(int)); - slist->bin_dim[0] = bin_dim_tmp[0]; - slist->bin_dim[1] = bin_dim_tmp[1]; - slist->bin_dim[2] = bin_dim_tmp[2]; - slist->bin_nmax = bin_nmax_tmp; - slist->binned_id=(int*) CudaWrapper_AllocCudaData(slist->bin_dim[0]*slist->bin_dim[1]*slist->bin_dim[2]*slist->bin_nmax*sizeof(int)); - //printf("slist->bin: %i %i %i %i \n", bin_dim_tmp[0],bin_dim_tmp[1],bin_dim_tmp[2],bin_nmax_tmp); - } - //if(list->cuda_list->sneighlist.bin_nmax>512) error->all(FLERR,"To many atoms per bin. Likely cause is very long pair cutoff. This needs major rewrite of code and is not yet scheduled to be done.\n"); - }while(Cuda_BinAtoms(&cuda->shared_data, &list->cuda_list->sneighlist)); - - // cuda->cu_debugdata->memset_device(0); - int maxneighbors=slist->maxneighbors; - - if((nex_type!=slist->nex_type)|| - (nex_group!=slist->nex_group)|| - (nex_mol!=slist->nex_mol)) - { - slist->nex_type=nex_type; - slist->nex_group=nex_group; - slist->nex_mol=nex_mol; - //printf("%i %i %i\n",nex_type,nex_group,nex_mol); - if(nex_type) - { - delete clist->cu_ex_type; - clist->cu_ex_type=new cCudaData<int , int , x> (&ex_type[0][0] , & slist->ex_type , (atom->ntypes+1)*(atom->ntypes+1) ); - clist->cu_ex_type->upload(); - } - //printf("AA %i %i %i\n",nex_type,nex_group,nex_mol); - if(nex_group) - { - delete clist->cu_ex1_bit; - clist->cu_ex1_bit=new cCudaData<int , int , x> (ex1_bit , & slist->ex1_bit , nex_group ); - clist->cu_ex1_bit->upload(); - //printf("A %i %i %i\n",nex_type,nex_group,nex_mol); - delete clist->cu_ex2_bit; - clist->cu_ex2_bit=new cCudaData<int , int , x> (ex2_bit , & slist->ex2_bit , nex_group ); - clist->cu_ex2_bit->upload(); - } - //printf("B %i %i %i\n",nex_type,nex_group,nex_mol); - if(nex_mol) - { - delete clist->cu_ex_mol_bit; - clist->cu_ex_mol_bit=new cCudaData<int , int , x> (ex_mol_bit , & slist->ex_mol_bit , nex_mol ); - clist->cu_ex_mol_bit->upload(); - } - //printf("C %i %i %i\n",nex_type,nex_group,nex_mol); - } - int overflow = 0; - do - { - overflow=0; - clist->grow_device(); - slist->cutneighsq=cutneighsq; - slist->maxneighbors=maxneighbors; - slist->inum = list->inum = nlocal; - //list->cuda_list->grow_device(); - if(cuda->shared_data.overlap_comm) - { - list->cuda_list->inum_border=0; - list->cuda_list->cu_inum_border->upload(); - } - - cuda->shared_data.atom.nall=nall; - //Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist); - overflow= Cuda_NeighborBuildFullBin(&cuda->shared_data, &list->cuda_list->sneighlist); - - /*cuda->cu_debugdata->download(); - printf("Debugdata: %i ",cuda->debugdata[0]); - for(int i=0;i<cuda->debugdata[0];i+=3) printf("// %i %i %i",cuda->debugdata[i+1],cuda->debugdata[i+2],cuda->debugdata[i+3]); - printf("\n");*/ - //printf("maxneighborsA: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax); - - if(overflow<0) - { - maxneighbors+=32; - if(-overflow>maxneighbors) maxneighbors=((-overflow+37)/32)*32; - delete list->cuda_list->cu_neighbors; - delete [] list->cuda_list->neighbors; - list->cuda_list->neighbors= new int[slist->maxlocal*maxneighbors]; - list->cuda_list->sneighlist.maxneighbors=maxneighbors; - //printf("maxneighborsA1: %i %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax,slist->maxlocal); - list->cuda_list->cu_neighbors= new cCudaData<int, int, x> (list->cuda_list->neighbors , & list->cuda_list->sneighlist.neighbors, slist->maxlocal*maxneighbors ); - //printf("maxneighborsA2: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax); - - if(cuda->shared_data.overlap_comm) - { - list->cuda_list->sneighlist.maxneighbors=maxneighbors; - list->cuda_list->dev_free(); - list->cuda_list->dev_alloc(); - } - //printf("maxneighborsA3: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax); - } - //printf("maxneighborsB: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax); - if(cuda->shared_data.overlap_comm) - { - list->cuda_list->cu_inum_border->download(); - list->cuda_list->sneighlist.inum_border2=list->cuda_list->inum_border; - } - } - while(overflow<0); - - //cuda->cu_debugdata->download(); - // printf("Differences in: %i\n",cuda->debugdata[0]); - // for(int i=0;i<20;i++) printf("%i %i %i %i// ",cuda->debugdata[4*i+1],cuda->debugdata[4*i+2],cuda->debugdata[4*i+3],cuda->debugdata[4*i+4]); -// printf("\n"); -/*for(int i=0;i<10;i++) -{ - printf("%i %i // ",i,numneigh[i]); - for(int j=0;j<numneigh[i];j++) - printf("%i ",list->cuda_list->neighbors[i+j*nlocal]); - printf("\n"); -}*/ -/* int count=0; - if(cuda->shared_data.overlap_comm) - { - list->cuda_list->cu_inum_border->download(); - list->cuda_list->cu_ilist_border->download(); - list->cuda_list->cu_numneigh_border->download(); - list->cuda_list->cu_numneigh_inner->download(); - list->cuda_list->cu_neighbors->download(); - list->cuda_list->cu_neighbors_inner->download(); - list->cuda_list->cu_neighbors_border->download(); - - //list->cuda_list->cu_firstneigh->download(); - // list->cuda_list->nl_download(); - list->cuda_list->cu_numneigh->download(); - int diff=0; - //for(int i=0;i<nlocal;i++)*/ - /* int i=123; - { - int k=-1; - //printf("inum_border: %i\n",list->cuda_list->inum_border); - //for(int j=0;j<list->numneigh[i];j++) printf("%i ",list->firstneigh[i][j]);printf("\n"); - for(int j=0;j<list->cuda_list->inum_border;j++) - if(list->cuda_list->ilist_border[j]==i) k=j; - int d=numneigh[i]-list->cuda_list->numneigh_inner[i]; - if(k>-1) d-=list->cuda_list->numneigh_border[k]; - if(d!=0) {printf("Error at %i %i %i %i %i\n",i,k,d,numneigh[i],list->cuda_list->numneigh_inner[i]); diff++;} - if(k>-1 && count<10) - { - printf("Numneighs: %i %i %i Border_i: %i %i\n",numneigh[i],list->cuda_list->numneigh_inner[i],list->cuda_list->numneigh_border[k],k,(int)list->cuda_list->cu_ilist_border->dev_data()); - cuda->shared_data.me=k; - for(int j=0;j<numneigh[i];j++) - printf("%i ",list->cuda_list->neighbors[i+j*nlocal]); - printf("\n"); - for(int j=0;j<list->cuda_list->numneigh_inner[i];j++) - printf("%i ",list->cuda_list->neighbors_inner[i+j*nlocal]); - printf(" // "); - for(int j=0;j<list->cuda_list->numneigh_border[k];j++) - printf("%i ",list->cuda_list->neighbors_border[k+j*nlocal]); - printf("\n"); - count++; - } - } - printf("%i\n",diff); - }*/ - list->cuda_list->cu_numneigh->download(); - list->cuda_list->cu_ilist->download(); - cuda->shared_data.atom.update_neigh=2; - //printf("Done\n"); - - MYDBG(printf(" # CUDA::NeighFullBinCuda ... end\n");) - -} - - -void NeighborCuda::full_nsq_cuda(NeighList *list) -{ - printf("Full_Nsq cuda neighbor list build is not implemented anymore.\n"); -return; -/* - MYDBG(printf(" # CUDA::NeighFullNSQCuda ... start\n");) - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; - - if(cuda->cu_xhold) cuda->cu_xhold->upload(); - - - if(not list->cuda_list) cuda->registerNeighborList(list); - list->cuda_list->build_cuda=true; - int maxneighbors=list->cuda_list->sneighlist.maxneighbors; - int neigh_lists_per_page=pgsize/maxneighbors; - int *ilist = list->ilist; - int *numneigh = list->numneigh; - int **firstneigh = list->firstneigh; - int **pages = list->pages; - - int overflow = 0; - int inum = 0; - int npage = 0; - int npnt = 0; - do - { - npage=0; - npnt=0; - inum=0; - overflow=0; - neigh_lists_per_page=pgsize/maxneighbors; - npage=(2*nlocal*maxneighbors-1)/pgsize; - while(npage>list->maxpage) list->add_pages(); - pages = list->pages; - npage=0; - list->cuda_list->sneighlist.neigh_lists_per_page=pgsize/maxneighbors; - list->cuda_list->grow_device(); - list->cuda_list->sneighlist.cutneighsq=cutneighsq; - list->cuda_list->sneighlist.maxneighbors=maxneighbors; - list->cuda_list->sneighlist.inum = list->inum = nlocal; - - cuda->shared_data.atom.nall=nall; - Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist); - overflow= not Cuda_NeighborBuildFullNsq(&cuda->shared_data, &list->cuda_list->sneighlist); - - - - if(overflow) maxneighbors+=32; - } - while(overflow); - if(not cudable) list->cuda_list->nl_download(); - MYDBG(printf(" # CUDA::NeighFullNSQCuda ... end\n");) - */ -} diff --git a/src/USER-CUDA/neighbor_cuda.cpp b/src/USER-CUDA/neighbor_cuda.cpp deleted file mode 100644 index 015e85ff07..0000000000 --- a/src/USER-CUDA/neighbor_cuda.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <math.h> -#include "neighbor_cuda.h" -#include "user_cuda.h" -#include "atom.h" -#include "atom_vec.h" -#include "domain.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "force.h" -#include "group.h" -#include "memory.h" -#include "error.h" -#include "update.h" - -using namespace LAMMPS_NS; - - - - -enum {NSQ, BIN, MULTI}; // also in neigh_list.cpp - -/* ---------------------------------------------------------------------- */ - -NeighborCuda::NeighborCuda(LAMMPS* lmp) : Neighbor(lmp) -{ - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); -} - -/* ---------------------------------------------------------------------- */ - -void NeighborCuda::init() -{ - cuda->set_neighinit(dist_check, 0.25 * skin * skin); - cudable = 1; - - Neighbor::init(); -} - -/* ---------------------------------------------------------------------- - overwrite either full_nsq or full_bin with CUDA-equivalent methods - any other neighbor build method is unchanged -------------------------------------------------------------------------- */ - -void NeighborCuda::choose_build(int index, NeighRequest* rq) -{ - Neighbor::choose_build(index, rq); - - if(rq->full && style == NSQ && rq->cudable) - pair_build[index] = (Neighbor::PairPtr) &NeighborCuda::full_nsq_cuda; - else if(rq->full && style == BIN && rq->cudable) - pair_build[index] = (Neighbor::PairPtr) &NeighborCuda::full_bin_cuda; -} - -/* ---------------------------------------------------------------------- */ - -int NeighborCuda::check_distance() -{ - double delx, dely, delz, rsq; - double delta, deltasq, delta1, delta2; - - if(boxcheck) { - if(triclinic == 0) { - delx = bboxlo[0] - boxlo_hold[0]; - dely = bboxlo[1] - boxlo_hold[1]; - delz = bboxlo[2] - boxlo_hold[2]; - delta1 = sqrt(delx * delx + dely * dely + delz * delz); - delx = bboxhi[0] - boxhi_hold[0]; - dely = bboxhi[1] - boxhi_hold[1]; - delz = bboxhi[2] - boxhi_hold[2]; - delta2 = sqrt(delx * delx + dely * dely + delz * delz); - delta = 0.5 * (skin - (delta1 + delta2)); - deltasq = delta * delta; - } else { - domain->box_corners(); - delta1 = delta2 = 0.0; - - for(int i = 0; i < 8; i++) { - delx = corners[i][0] - corners_hold[i][0]; - dely = corners[i][1] - corners_hold[i][1]; - delz = corners[i][2] - corners_hold[i][2]; - delta = sqrt(delx * delx + dely * dely + delz * delz); - - if(delta > delta1) delta1 = delta; - else if(delta > delta2) delta2 = delta; - } - - delta = 0.5 * (skin - (delta1 + delta2)); - deltasq = delta * delta; - } - } else deltasq = triggersq; - - double** x = atom->x; - int nlocal = atom->nlocal; - - if(includegroup) nlocal = atom->nfirst; - - int flag = 0; - - if(not cuda->neighbor_decide_by_integrator) { - cuda->cu_x_download(); - - for(int i = 0; i < nlocal; i++) { - delx = x[i][0] - xhold[i][0]; - dely = x[i][1] - xhold[i][1]; - delz = x[i][2] - xhold[i][2]; - rsq = delx * delx + dely * dely + delz * delz; - - if(rsq > deltasq) flag = 1; - } - } else flag = cuda->shared_data.atom.reneigh_flag; - - int flagall; - MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_MAX, world); - - if(flagall && ago == MAX(every, delay)) ndanger++; - - return flagall; -} - -/* ---------------------------------------------------------------------- */ - -void NeighborCuda::build(int topoflag) -{ - int i; - - ago = 0; - ncalls++; - lastcall = update->ntimestep; - // store current atom positions and box size if needed - - if(dist_check) { - if(cuda->decide_by_integrator()) - cuda->update_xhold(maxhold, &xhold[0][0]); - else { - if(cuda->finished_setup) cuda->cu_x_download(); - - double** x = atom->x; - int nlocal = atom->nlocal; - - if(includegroup) nlocal = atom->nfirst; - - if(nlocal > maxhold) { - maxhold = atom->nmax; - memory->destroy(xhold); - memory->create(xhold, maxhold, 3, "neigh:xhold"); - } - - for(i = 0; i < nlocal; i++) { - xhold[i][0] = x[i][0]; - xhold[i][1] = x[i][1]; - xhold[i][2] = x[i][2]; - } - - if(boxcheck) { - if(triclinic == 0) { - boxlo_hold[0] = bboxlo[0]; - boxlo_hold[1] = bboxlo[1]; - boxlo_hold[2] = bboxlo[2]; - boxhi_hold[0] = bboxhi[0]; - boxhi_hold[1] = bboxhi[1]; - boxhi_hold[2] = bboxhi[2]; - } else { - domain->box_corners(); - corners = domain->corners; - - for(i = 0; i < 8; i++) { - corners_hold[i][0] = corners[i][0]; - corners_hold[i][1] = corners[i][1]; - corners_hold[i][2] = corners[i][2]; - } - } - } - } - } - - if(not cudable && cuda->finished_setup && atom->avec->cudable) - cuda->downloadAll(); - - if(cudable && (not cuda->finished_setup)) { - cuda->checkResize(); - cuda->uploadAll(); - } - - // if any lists store neighbors of ghosts: - // invoke grow() if nlocal+nghost exceeds previous list size - // else only invoke grow() if nlocal exceeds previous list size - // only done for lists with growflag set and which are perpetual - - if(anyghostlist && atom->nlocal + atom->nghost > maxatom) { - maxatom = atom->nmax; - - for(i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom); - } else if(atom->nlocal > maxatom) { - maxatom = atom->nmax; - - for(i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom); - } - - // extend atom bin list if necessary - - if(style != NSQ && atom->nmax > maxbin) { - maxbin = atom->nmax; - memory->destroy(bins); - memory->create(bins, maxbin, "bins"); - } - - // check that neighbor list with special bond flags will not overflow - - if(atom->nlocal + atom->nghost > NEIGHMASK) - error->one(FLERR, "Too many local+ghost atoms for neighbor list"); - - // invoke building of pair and molecular neighbor lists - // only for pairwise lists with buildflag set - - for(i = 0; i < nblist; i++) - (this->*pair_build[blist[i]])(lists[blist[i]]); - - if(atom->molecular && topoflag) { - if(force->bond)(this->*bond_build)(); - if(force->angle)(this->*angle_build)(); - if(force->dihedral)(this->*dihedral_build)(); - if(force->improper)(this->*improper_build)(); - } -} diff --git a/src/USER-CUDA/neighbor_cuda.h b/src/USER-CUDA/neighbor_cuda.h deleted file mode 100644 index 708f45fde2..0000000000 --- a/src/USER-CUDA/neighbor_cuda.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_NEIGHBOR_CUDA_H -#define LMP_NEIGHBOR_CUDA_H - -#include "neighbor.h" - -namespace LAMMPS_NS { - -class NeighborCuda : public Neighbor { - public: - NeighborCuda(class LAMMPS *); - void init(); - int check_distance(); - void build(int do_build_bonded=1); - - private: - class Cuda *cuda; - - void choose_build(int, class NeighRequest *); - typedef void (NeighborCuda::*PairPtr)(class NeighList *); - void full_nsq_cuda(class NeighList *); - void full_bin_cuda(class NeighList *); -}; - -} - -#endif diff --git a/src/USER-CUDA/pair_born_coul_long_cuda.cpp b/src/USER-CUDA/pair_born_coul_long_cuda.cpp deleted file mode 100644 index d01a5bf47f..0000000000 --- a/src/USER-CUDA/pair_born_coul_long_cuda.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_born_coul_long_cuda.h" -#include "pair_born_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define A1 0.254829592 -#define A2 -0.284496736 -#define A3 1.421413741 -#define A4 -1.453152027 -#define A5 1.061405429 -/* ---------------------------------------------------------------------- */ - -PairBornCoulLongCuda::PairBornCoulLongCuda(LAMMPS *lmp) : PairBornCoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairBornCoulLongCuda::allocate() -{ - if(! allocated) PairBornCoulLong::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = rhoinv; - cuda->shared_data.pair.coeff2 = sigma; - cuda->shared_data.pair.coeff3 = a; - cuda->shared_data.pair.coeff4 = c; - cuda->shared_data.pair.coeff5 = d; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairBornCoulLongCuda::compute(int eflag, int vflag) -{ - MYDBG( printf("PairBornCoulLongCuda compute start\n"); fflush(stdout);) - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - #ifdef CUDA_USE_BINNING - Cuda_PairBornCoulLongCuda(& cuda->shared_data, eflag, vflag); - #else - Cuda_PairBornCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - #endif - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } - MYDBG( printf("PairBornCoulLongCuda compute end\n"); fflush(stdout);) -} - -/* ---------------------------------------------------------------------- */ - -void PairBornCoulLongCuda::settings(int narg, char **arg) -{ - PairBornCoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairBornCoulLongCuda::coeff(int narg, char **arg) -{ - PairBornCoulLong::coeff(narg, arg); - allocate(); -} - -void PairBornCoulLongCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style born/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda"); - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cut_coulsq = cut_coul * cut_coul; - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - if (force->kspace == NULL) - error->all(FLERR,"Pair style is incompatible with KSpace style"); - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairBornCoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list\n");) - PairBornCoulLong::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list end\n");) -} - -void PairBornCoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairBornCoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_born_coul_long_cuda.h b/src/USER-CUDA/pair_born_coul_long_cuda.h deleted file mode 100644 index 6e4f42cf3b..0000000000 --- a/src/USER-CUDA/pair_born_coul_long_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(born/coul/long/cuda,PairBornCoulLongCuda) - -#else - -#ifndef LMP_PAIR_BORN_COUL_LONG_CUDA_H -#define LMP_PAIR_BORN_COUL_LONG_CUDA_H - -#include "pair_born_coul_long.h" - -namespace LAMMPS_NS { - -class PairBornCoulLongCuda : public PairBornCoulLong -{ - public: - PairBornCoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp b/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp deleted file mode 100644 index 4291b82752..0000000000 --- a/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_buck_coul_cut_cuda.h" -#include "pair_buck_coul_cut_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairBuckCoulCutCuda::PairBuckCoulCutCuda(LAMMPS *lmp) : PairBuckCoulCut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairBuckCoulCutCuda::allocate() -{ - if(! allocated) PairBuckCoulCut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut_coul = cut_coul; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = rhoinv; - cuda->shared_data.pair.coeff2 = buck1; - cuda->shared_data.pair.coeff3 = buck2; - cuda->shared_data.pair.coeff4 = a; - cuda->shared_data.pair.coeff5 = c; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulCutCuda::compute(int eflag, int vflag) -{ - MYDBG( printf("PairBuckCoulCutCuda compute start\n"); fflush(stdout);) - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairBuckCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } - MYDBG( printf("PairBuckCoulCutCuda compute end\n"); fflush(stdout);) -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulCutCuda::settings(int narg, char **arg) -{ - PairBuckCoulCut::settings(narg, arg); - cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global; - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulCutCuda::coeff(int narg, char **arg) -{ - PairBuckCoulCut::coeff(narg, arg); - allocate(); -} - -void PairBuckCoulCutCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style buck/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda"); - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - cuda->shared_data.pair.cut_coulsq_global=cut_coul_global * cut_coul_global; - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairBuckCoulCutCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list\n");) - PairBuckCoulCut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list end\n");) -} - -void PairBuckCoulCutCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairBuckCoulCut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_buck_coul_cut_cuda.h b/src/USER-CUDA/pair_buck_coul_cut_cuda.h deleted file mode 100644 index f66b70fb00..0000000000 --- a/src/USER-CUDA/pair_buck_coul_cut_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(buck/coul/cut/cuda,PairBuckCoulCutCuda) - -#else - -#ifndef LMP_PAIR_BUCK_COUL_CUT_CUDA_H -#define LMP_PAIR_BUCK_COUL_CUT_CUDA_H - -#include "pair_buck_coul_cut.h" - -namespace LAMMPS_NS { - -class PairBuckCoulCutCuda : public PairBuckCoulCut -{ - public: - PairBuckCoulCutCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_buck_coul_long_cuda.cpp b/src/USER-CUDA/pair_buck_coul_long_cuda.cpp deleted file mode 100644 index 8c8d667165..0000000000 --- a/src/USER-CUDA/pair_buck_coul_long_cuda.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_buck_coul_long_cuda.h" -#include "pair_buck_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define A1 0.254829592 -#define A2 -0.284496736 -#define A3 1.421413741 -#define A4 -1.453152027 -#define A5 1.061405429 -/* ---------------------------------------------------------------------- */ - -PairBuckCoulLongCuda::PairBuckCoulLongCuda(LAMMPS *lmp) : PairBuckCoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairBuckCoulLongCuda::allocate() -{ - if(! allocated) PairBuckCoulLong::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = rhoinv; - cuda->shared_data.pair.coeff2 = buck1; - cuda->shared_data.pair.coeff3 = buck2; - cuda->shared_data.pair.coeff4 = a; - cuda->shared_data.pair.coeff5 = c; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulLongCuda::compute(int eflag, int vflag) -{ - MYDBG( printf("PairBuckCoulLongCuda compute start\n"); fflush(stdout);) - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairBuckCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } - MYDBG( printf("PairBuckCoulLongCuda compute end\n"); fflush(stdout);) -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulLongCuda::settings(int narg, char **arg) -{ - PairBuckCoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCoulLongCuda::coeff(int narg, char **arg) -{ - PairBuckCoulLong::coeff(narg, arg); - allocate(); -} - -void PairBuckCoulLongCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style buck/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda"); - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cut_coulsq = cut_coul * cut_coul; - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - if (force->kspace == NULL) - error->all(FLERR,"Pair style is incompatible with KSpace style"); - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairBuckCoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list\n");) - PairBuckCoulLong::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list end\n");) -} - -void PairBuckCoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairBuckCoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_buck_coul_long_cuda.h b/src/USER-CUDA/pair_buck_coul_long_cuda.h deleted file mode 100644 index 41d4637d9a..0000000000 --- a/src/USER-CUDA/pair_buck_coul_long_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(buck/coul/long/cuda,PairBuckCoulLongCuda) - -#else - -#ifndef LMP_PAIR_BUCK_COUL_LONG_CUDA_H -#define LMP_PAIR_BUCK_COUL_LONG_CUDA_H - -#include "pair_buck_coul_long.h" - -namespace LAMMPS_NS { - -class PairBuckCoulLongCuda : public PairBuckCoulLong -{ - public: - PairBuckCoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_buck_cuda.cpp b/src/USER-CUDA/pair_buck_cuda.cpp deleted file mode 100644 index bcb9314c5f..0000000000 --- a/src/USER-CUDA/pair_buck_cuda.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_buck_cuda.h" -#include "pair_buck_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairBuckCuda::PairBuckCuda(LAMMPS *lmp) : PairBuck(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairBuckCuda::allocate() -{ - if(! allocated) PairBuck::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = rhoinv; - cuda->shared_data.pair.coeff2 = buck1; - cuda->shared_data.pair.coeff3 = buck2; - cuda->shared_data.pair.coeff4 = a; - cuda->shared_data.pair.coeff5 = c; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCuda::compute(int eflag, int vflag) -{ - MYDBG( printf("PairBuckCuda compute start\n"); fflush(stdout);) - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairBuckCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } - MYDBG( printf("PairBuckCuda compute end\n"); fflush(stdout);) -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCuda::settings(int narg, char **arg) -{ - PairBuck::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairBuckCuda::coeff(int narg, char **arg) -{ - PairBuck::coeff(narg, arg); - allocate(); -} - -void PairBuckCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style buck/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda"); - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairBuckCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairBuckCuda::init_list\n");) - PairBuck::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairBuckCuda::init_list end\n");) -} - -void PairBuckCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairBuck::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_buck_cuda.h b/src/USER-CUDA/pair_buck_cuda.h deleted file mode 100644 index 9dfb742ed0..0000000000 --- a/src/USER-CUDA/pair_buck_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(buck/cuda,PairBuckCuda) - -#else - -#ifndef LMP_PAIR_BUCK_CUDA_H -#define LMP_PAIR_BUCK_CUDA_H - -#include "pair_buck.h" - -namespace LAMMPS_NS { - -class PairBuckCuda : public PairBuck -{ - public: - PairBuckCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_eam_alloy_cuda.cpp b/src/USER-CUDA/pair_eam_alloy_cuda.cpp deleted file mode 100644 index c0b76c7e11..0000000000 --- a/src/USER-CUDA/pair_eam_alloy_cuda.cpp +++ /dev/null @@ -1,326 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL) -------------------------------------------------------------------------- */ - -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_eam_alloy_cuda.h" -#include "atom.h" -#include "comm.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; - -#define MAXLINE 1024 - -/* ---------------------------------------------------------------------- */ - -PairEAMAlloyCuda::PairEAMAlloyCuda(LAMMPS *lmp) : PairEAMCuda(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - one_coeff = 1; -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs - read DYNAMO setfl file -------------------------------------------------------------------------- */ - -void PairEAMAlloyCuda::coeff(int narg, char **arg) -{ - int i,j; - - if (!allocated) allocate(); - - if (narg != 3 + atom->ntypes) - error->all(FLERR,"Incorrect args for pair coefficients"); - - // insure I,J args are * * - - if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) - error->all(FLERR,"Incorrect args for pair coefficients"); - - // read EAM setfl file - - if (setfl) { - for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; - delete [] setfl->elements; - delete [] setfl->mass; - memory->destroy(setfl->frho); - memory->destroy(setfl->rhor); - memory->destroy(setfl->z2r); - delete setfl; - } - setfl = new Setfl(); - read_file(arg[2]); - - // read args that map atom types to elements in potential file - // map[i] = which element the Ith atom type is, -1 if NULL - - for (i = 3; i < narg; i++) { - if (strcmp(arg[i],"NULL") == 0) { - map[i-2] = -1; - continue; - } - for (j = 0; j < setfl->nelements; j++) - if (strcmp(arg[i],setfl->elements[j]) == 0) break; - if (j < setfl->nelements) map[i-2] = j; - else error->all(FLERR,"No matching element in EAM potential file"); - } - - // clear setflag since coeff() called once with I,J = * * - - int n = atom->ntypes; - for (i = 1; i <= n; i++) - for (j = i; j <= n; j++) - setflag[i][j] = 0; - - // set setflag i,j for type pairs where both are mapped to elements - // set mass of atom type if i = j - - int count = 0; - for (i = 1; i <= n; i++) { - for (j = i; j <= n; j++) { - if (map[i] >= 0 && map[j] >= 0) { - setflag[i][j] = 1; - if (i == j) atom->set_mass(i,setfl->mass[map[i]]); - count++; - } - } - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); -} - -/* ---------------------------------------------------------------------- - read a multi-element DYNAMO setfl file -------------------------------------------------------------------------- */ - -void PairEAMAlloyCuda::read_file(char *filename) -{ - Setfl *file = setfl; - - // open potential file - - int me = comm->me; - FILE *fptr; - char line[MAXLINE]; - - if (me == 0) { - fptr = fopen(filename,"r"); - if (fptr == NULL) { - char str[128]; - sprintf(str,"Cannot open EAM potential file %s",filename); - error->one(FLERR,str); - } - } - - // read and broadcast header - // extract element names from nelements line - - int n; - if (me == 0) { - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - n = strlen(line) + 1; - } - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - sscanf(line,"%d",&file->nelements); - int nwords = atom->count_words(line); - if (nwords != file->nelements + 1) - error->all(FLERR,"Incorrect element names in EAM potential file"); - - char **words = new char*[file->nelements+1]; - nwords = 0; - strtok(line," \t\n\r\f"); - while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; - - file->elements = new char*[file->nelements]; - for (int i = 0; i < file->nelements; i++) { - n = strlen(words[i]) + 1; - file->elements[i] = new char[n]; - strcpy(file->elements[i],words[i]); - } - delete [] words; - - if (me == 0) { - fgets(line,MAXLINE,fptr); - sscanf(line,"%d %lg %d %lg %lg", - &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); - } - - MPI_Bcast(&file->nrho,1,MPI_INT,0,world); - MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); - MPI_Bcast(&file->nr,1,MPI_INT,0,world); - MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); - MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); - - file->mass = new double[file->nelements]; - memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); - memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); - memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, - "pair:z2r"); - int i,j,tmp; - for (i = 0; i < file->nelements; i++) { - if (me == 0) { - fgets(line,MAXLINE,fptr); - sscanf(line,"%d %lg",&tmp,&file->mass[i]); - } - MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); - - if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); - MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); - if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); - MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); - } - - for (i = 0; i < file->nelements; i++) - for (j = 0; j <= i; j++) { - if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); - MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); - } - - // close the potential file - - if (me == 0) fclose(fptr); -} - -/* ---------------------------------------------------------------------- - copy read-in setfl potential to standard array format -------------------------------------------------------------------------- */ - -void PairEAMAlloyCuda::file2array() -{ - int i,j,m,n; - int ntypes = atom->ntypes; - - // set function params directly from setfl file - - nrho = setfl->nrho; - nr = setfl->nr; - drho = setfl->drho; - dr = setfl->dr; - - // ------------------------------------------------------------------ - // setup frho arrays - // ------------------------------------------------------------------ - - // allocate frho arrays - // nfrho = # of setfl elements + 1 for zero array - - nfrho = setfl->nelements + 1; - memory->destroy(frho); - memory->create(frho,nfrho,nrho+1,"pair:frho"); - - // copy each element's frho to global frho - - for (i = 0; i < setfl->nelements; i++) - for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; - - // add extra frho of zeroes for non-EAM types to point to (pair hybrid) - // this is necessary b/c fp is still computed for non-EAM atoms - - for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; - - // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to - // if atom type doesn't point to element (non-EAM atom in pair hybrid) - // then map it to last frho array of zeroes - - for (i = 1; i <= ntypes; i++) - if (map[i] >= 0) type2frho[i] = map[i]; - else type2frho[i] = nfrho-1; - - // ------------------------------------------------------------------ - // setup rhor arrays - // ------------------------------------------------------------------ - - // allocate rhor arrays - // nrhor = # of setfl elements - - nrhor = setfl->nelements; - memory->destroy(rhor); - memory->create(rhor,nrhor,nr+1,"pair:rhor"); - - // copy each element's rhor to global rhor - - for (i = 0; i < setfl->nelements; i++) - for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; - - // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to - // for setfl files, I,J mapping only depends on I - // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used - - for (i = 1; i <= ntypes; i++) - for (j = 1; j <= ntypes; j++) - type2rhor[i][j] = map[i]; - - // ------------------------------------------------------------------ - // setup z2r arrays - // ------------------------------------------------------------------ - - // allocate z2r arrays - // nz2r = N*(N+1)/2 where N = # of setfl elements - - nz2r = setfl->nelements * (setfl->nelements+1) / 2; - memory->destroy(z2r); - memory->create(z2r,nz2r,nr+1,"pair:z2r"); - - // copy each element pair z2r to global z2r, only for I >= J - - n = 0; - for (i = 0; i < setfl->nelements; i++) - for (j = 0; j <= i; j++) { - for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; - n++; - } - - // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to - // set of z2r arrays only fill lower triangular Nelement matrix - // value = n = sum over rows of lower-triangular matrix until reach irow,icol - // swap indices when irow < icol to stay lower triangular - // if map = -1 (non-EAM atom in pair hybrid): - // type2z2r is not used by non-opt - // but set type2z2r to 0 since accessed by opt - - int irow,icol; - for (i = 1; i <= ntypes; i++) { - for (j = 1; j <= ntypes; j++) { - irow = map[i]; - icol = map[j]; - if (irow == -1 || icol == -1) { - type2z2r[i][j] = 0; - continue; - } - if (irow < icol) { - irow = map[j]; - icol = map[i]; - } - n = 0; - for (m = 0; m < irow; m++) n += m + 1; - n += icol; - type2z2r[i][j] = n; - } - } -} diff --git a/src/USER-CUDA/pair_eam_alloy_cuda.h b/src/USER-CUDA/pair_eam_alloy_cuda.h deleted file mode 100644 index c46755d0f8..0000000000 --- a/src/USER-CUDA/pair_eam_alloy_cuda.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(eam/alloy/cuda,PairEAMAlloyCuda) - -#else - -#ifndef LMP_PAIR_EAM_CUDA_ALLOY_H -#define LMP_PAIR_EAM_CUDA_ALLOY_H - -#include "pair_eam_cuda.h" - -namespace LAMMPS_NS { - -// use virtual public since this class is parent in multiple inheritance - -class PairEAMAlloyCuda : virtual public PairEAMCuda { - public: - PairEAMAlloyCuda(class LAMMPS *); - virtual ~PairEAMAlloyCuda() {} - void coeff(int, char **); - - protected: - class Cuda *cuda; - void read_file(char *); - void file2array(); -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_eam_cuda.cpp b/src/USER-CUDA/pair_eam_cuda.cpp deleted file mode 100644 index 3db0c66cd6..0000000000 --- a/src/USER-CUDA/pair_eam_cuda.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_eam_cuda.h" -#include "pair_eam_cuda_cu.h" -#include "pair_virial_compute_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairEAMCuda::PairEAMCuda(LAMMPS* lmp) : PairEAM(lmp) -{ - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.override_block_per_atom = 0; - - cuda->setSystemParams(); - cu_rho = NULL; - cu_fp = NULL; - cu_frho_spline = NULL; - cu_z2r_spline = NULL; - cu_rhor_spline = NULL; -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairEAMCuda::allocate() -{ - if(! allocated) PairEAM::allocate(); - - cuda->shared_data.pair.cutsq = cutsq; - cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq; -} - -/* ---------------------------------------------------------------------- */ - -void PairEAMCuda::compute(int eflag, int vflag) -{ - cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->shared_data.pair.collect_forces_later = 0; - - if(atom->nmax > nmax || cuda->finished_setup == false) { - memory->destroy(rho); - memory->destroy(fp); - nmax = atom->nmax; - memory->create(rho, nmax, "pair:rho"); - memory->create(fp, nmax, "pair:fp"); - delete cu_rho; - delete cu_fp; - cu_rho = new cCudaData<double, F_CFLOAT, x> (rho, atom->nmax); - cu_fp = new cCudaData<double, F_CFLOAT, x> (fp, atom->nmax); - Cuda_PairEAMCuda_Init(&cuda->shared_data, rdr, rdrho, nfrho, nrhor, nr, nrho, nz2r, - cu_frho_spline->dev_data(), cu_rhor_spline->dev_data(), cu_z2r_spline->dev_data(), - cu_rho->dev_data(), cu_fp->dev_data(), type2frho, type2z2r, type2rhor); - } - - - - if(eflag || vflag) ev_setup(eflag, vflag); - - if(eflag) cuda->cu_eng_vdwl->upload(); - - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - comm->forward_comm_pair(this); - - Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(eflag) cuda->cu_eng_vdwl->download(); - - if(vflag) cuda->cu_virial->download(); -} - -/* ---------------------------------------------------------------------- */ - -void PairEAMCuda::settings(int narg, char** arg) -{ - PairEAM::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq; -} - -/* ---------------------------------------------------------------------- */ - -void PairEAMCuda::coeff(int narg, char** arg) -{ - PairEAM::coeff(narg, arg); - allocate(); -} - -void PairEAMCuda::init_style() -{ - MYDBG(printf("# CUDA PairEAMCuda::init_style start\n");) - // request regular or rRESPA neighbor lists - file2array(); - array2spline(); - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - delete cu_rhor_spline; - delete cu_z2r_spline; - delete cu_frho_spline; - - cu_rhor_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)rhor_spline, nrhor, nr + 1, EAM_COEFF_LENGTH); - cu_z2r_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)z2r_spline, nz2r, nr + 1, EAM_COEFF_LENGTH); - cu_frho_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)frho_spline, nfrho, nrho + 1, EAM_COEFF_LENGTH); - - cu_rhor_spline->upload(); - cu_z2r_spline->upload(); - cu_frho_spline->upload(); - - MYDBG(printf("# CUDA PairEAMCuda::init_style end\n");) -} - -void PairEAMCuda::init_list(int id, NeighList* ptr) -{ - MYDBG(printf("# CUDA PairEAMCuda::init_list\n");) - PairEAM::init_list(id, ptr); - - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - - // see Neighbor::init() for details on lammps lists' logic - MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");) -} - -void PairEAMCuda::array2spline() -{ - rdr = 1.0 / dr; - rdrho = 1.0 / drho; - - memory->destroy(frho_spline); - memory->destroy(rhor_spline); - memory->destroy(z2r_spline); - - memory->create(frho_spline, nfrho, nrho + 1, 8, "pair:frho"); - memory->create(rhor_spline, nrhor, nr + 1, 8, "pair:rhor"); - memory->create(z2r_spline, nz2r, nr + 1, 8, "pair:z2r"); - - for(int i = 0; i < nfrho; i++) { - interpolate(nrho, drho, frho[i], frho_spline[i]); - - for(int j = 0; j < nrho + 1; j++) - frho_spline[i][j][7] = frho_spline[i][j][3]; - } - - for(int i = 0; i < nrhor; i++) { - interpolate(nr, dr, rhor[i], rhor_spline[i]); - - for(int j = 0; j < nr + 1; j++) - rhor_spline[i][j][7] = rhor_spline[i][j][3]; - } - - for(int i = 0; i < nz2r; i++) { - interpolate(nr, dr, z2r[i], z2r_spline[i]); - - for(int j = 0; j < nr + 1; j++) - z2r_spline[i][j][7] = z2r_spline[i][j][3]; - } -} - -/* ---------------------------------------------------------------------- */ - -int PairEAMCuda::pack_forward_comm(int n, int* iswap, double* buf, - int pbc_flag, int* pbc) -{ - Cuda_PairEAMCuda_PackComm(&cuda->shared_data, n, *iswap, buf); - - if(sizeof(F_CFLOAT) < sizeof(double)) return n; - else return n; -} - -/* ---------------------------------------------------------------------- */ - -void PairEAMCuda::unpack_forward_comm(int n, int first, double* buf) -{ - Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data, n, first, buf, cu_fp->dev_data()); -} - -void PairEAMCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold = maxeatom; - PairEAM::ev_setup(eflag, vflag); - - if(eflag_atom && atom->nmax > maxeatomold) { - delete cuda->cu_eatom; - cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax); - } - - if(vflag_atom && atom->nmax > maxeatomold) { - delete cuda->cu_vatom; - cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6); - } - -} diff --git a/src/USER-CUDA/pair_eam_cuda.h b/src/USER-CUDA/pair_eam_cuda.h deleted file mode 100644 index 973fc20a45..0000000000 --- a/src/USER-CUDA/pair_eam_cuda.h +++ /dev/null @@ -1,80 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ -#ifdef PAIR_CLASS - -PairStyle(eam/cuda,PairEAMCuda) - -#else - -#ifndef PAIR_EAM_CUDA_H -#define PAIR_EAM_CUDA_H - -#include "cuda_data.h" -#include "pair_eam.h" - -namespace LAMMPS_NS { - -class PairEAMCuda : public PairEAM -{ - public: - PairEAMCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void array2spline(); - int pack_forward_comm(int n, int *iswap, double *buf, - int pbc_flag, int *pbc); - void unpack_forward_comm(int n, int first, double *buf); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - virtual void ev_setup(int eflag, int vflag); - class CudaNeighList* cuda_neigh_list; - cCudaData<double, F_CFLOAT, x>* cu_rho; - cCudaData<double, F_CFLOAT, x>* cu_fp; - cCudaData<double, F_CFLOAT, xyz>* cu_rhor_spline; - cCudaData<double, F_CFLOAT, xyz>* cu_z2r_spline; - cCudaData<double, F_CFLOAT, xyz>* cu_frho_spline; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_eam_fs_cuda.cpp b/src/USER-CUDA/pair_eam_fs_cuda.cpp deleted file mode 100644 index 6190213402..0000000000 --- a/src/USER-CUDA/pair_eam_fs_cuda.cpp +++ /dev/null @@ -1,335 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Tim Lau (MIT) -------------------------------------------------------------------------- */ - -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_eam_fs_cuda.h" -#include "atom.h" -#include "comm.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; - -#define MAXLINE 1024 - -/* ---------------------------------------------------------------------- */ - -PairEAMFSCuda::PairEAMFSCuda(LAMMPS *lmp) : PairEAMCuda(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - one_coeff = 1; -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs - read EAM Finnis-Sinclair file -------------------------------------------------------------------------- */ - -void PairEAMFSCuda::coeff(int narg, char **arg) -{ - int i,j; - - if (!allocated) allocate(); - - if (narg != 3 + atom->ntypes) - error->all(FLERR,"Incorrect args for pair coefficients"); - - // insure I,J args are * * - - if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) - error->all(FLERR,"Incorrect args for pair coefficients"); - - // read EAM Finnis-Sinclair file - - if (fs) { - for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; - delete [] fs->elements; - delete [] fs->mass; - memory->destroy(fs->frho); - memory->destroy(fs->rhor); - memory->destroy(fs->z2r); - delete fs; - } - fs = new Fs(); - read_file(arg[2]); - - // read args that map atom types to elements in potential file - // map[i] = which element the Ith atom type is, -1 if NULL - - for (i = 3; i < narg; i++) { - if (strcmp(arg[i],"NULL") == 0) { - map[i-2] = -1; - continue; - } - for (j = 0; j < fs->nelements; j++) - if (strcmp(arg[i],fs->elements[j]) == 0) break; - if (j < fs->nelements) map[i-2] = j; - else error->all(FLERR,"No matching element in EAM potential file"); - } - - // clear setflag since coeff() called once with I,J = * * - - int n = atom->ntypes; - for (i = 1; i <= n; i++) - for (j = i; j <= n; j++) - setflag[i][j] = 0; - - // set setflag i,j for type pairs where both are mapped to elements - // set mass of atom type if i = j - - int count = 0; - for (i = 1; i <= n; i++) { - for (j = i; j <= n; j++) { - if (map[i] >= 0 && map[j] >= 0) { - setflag[i][j] = 1; - if (i == j) atom->set_mass(i,fs->mass[map[i]]); - count++; - } - } - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); -} - -/* ---------------------------------------------------------------------- - read a multi-element DYNAMO setfl file -------------------------------------------------------------------------- */ - -void PairEAMFSCuda::read_file(char *filename) -{ - Fs *file = fs; - - // open potential file - - int me = comm->me; - FILE *fptr; - char line[MAXLINE]; - - if (me == 0) { - fptr = fopen(filename,"r"); - if (fptr == NULL) { - char str[128]; - sprintf(str,"Cannot open EAM potential file %s",filename); - error->one(FLERR,str); - } - } - - // read and broadcast header - // extract element names from nelements line - - int n; - if (me == 0) { - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - fgets(line,MAXLINE,fptr); - n = strlen(line) + 1; - } - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - sscanf(line,"%d",&file->nelements); - int nwords = atom->count_words(line); - if (nwords != file->nelements + 1) - error->all(FLERR,"Incorrect element names in EAM potential file"); - - char **words = new char*[file->nelements+1]; - nwords = 0; - strtok(line," \t\n\r\f"); - while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; - - file->elements = new char*[file->nelements]; - for (int i = 0; i < file->nelements; i++) { - n = strlen(words[i]) + 1; - file->elements[i] = new char[n]; - strcpy(file->elements[i],words[i]); - } - delete [] words; - - if (me == 0) { - fgets(line,MAXLINE,fptr); - sscanf(line,"%d %lg %d %lg %lg", - &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); - } - - MPI_Bcast(&file->nrho,1,MPI_INT,0,world); - MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); - MPI_Bcast(&file->nr,1,MPI_INT,0,world); - MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); - MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); - - file->mass = new double[file->nelements]; - memory->create(file->frho,file->nelements,file->nrho+1, - "pair:frho"); - memory->create(file->rhor,file->nelements,file->nelements, - file->nr+1,"pair:rhor"); - memory->create(file->z2r,file->nelements,file->nelements, - file->nr+1,"pair:z2r"); - int i,j,tmp; - for (i = 0; i < file->nelements; i++) { - if (me == 0) { - fgets(line,MAXLINE,fptr); - sscanf(line,"%d %lg",&tmp,&file->mass[i]); - } - MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); - - if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); - MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); - - for (j = 0; j < file->nelements; j++) { - if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); - MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); - } - } - - for (i = 0; i < file->nelements; i++) - for (j = 0; j <= i; j++) { - if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); - MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); - } - - // close the potential file - - if (me == 0) fclose(fptr); -} - -/* ---------------------------------------------------------------------- - copy read-in setfl potential to standard array format -------------------------------------------------------------------------- */ - -void PairEAMFSCuda::file2array() -{ - int i,j,m,n; - int ntypes = atom->ntypes; - - // set function params directly from fs file - - nrho = fs->nrho; - nr = fs->nr; - drho = fs->drho; - dr = fs->dr; - - // ------------------------------------------------------------------ - // setup frho arrays - // ------------------------------------------------------------------ - - // allocate frho arrays - // nfrho = # of fs elements + 1 for zero array - - nfrho = fs->nelements + 1; - memory->destroy(frho); - memory->create(frho,nfrho,nrho+1,"pair:frho"); - - // copy each element's frho to global frho - - for (i = 0; i < fs->nelements; i++) - for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; - - // add extra frho of zeroes for non-EAM types to point to (pair hybrid) - // this is necessary b/c fp is still computed for non-EAM atoms - - for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; - - // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to - // if atom type doesn't point to element (non-EAM atom in pair hybrid) - // then map it to last frho array of zeroes - - for (i = 1; i <= ntypes; i++) - if (map[i] >= 0) type2frho[i] = map[i]; - else type2frho[i] = nfrho-1; - - // ------------------------------------------------------------------ - // setup rhor arrays - // ------------------------------------------------------------------ - - // allocate rhor arrays - // nrhor = square of # of fs elements - - nrhor = fs->nelements * fs->nelements; - memory->destroy(rhor); - memory->create(rhor,nrhor,nr+1,"pair:rhor"); - - // copy each element pair rhor to global rhor - - n = 0; - for (i = 0; i < fs->nelements; i++) - for (j = 0; j < fs->nelements; j++) { - for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; - n++; - } - - // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to - // for fs files, there is a full NxN set of rhor arrays - // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used - - for (i = 1; i <= ntypes; i++) - for (j = 1; j <= ntypes; j++) - type2rhor[i][j] = map[i] * fs->nelements + map[j]; - - // ------------------------------------------------------------------ - // setup z2r arrays - // ------------------------------------------------------------------ - - // allocate z2r arrays - // nz2r = N*(N+1)/2 where N = # of fs elements - - nz2r = fs->nelements * (fs->nelements+1) / 2; - memory->destroy(z2r); - memory->create(z2r,nz2r,nr+1,"pair:z2r"); - - // copy each element pair z2r to global z2r, only for I >= J - - n = 0; - for (i = 0; i < fs->nelements; i++) - for (j = 0; j <= i; j++) { - for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; - n++; - } - - // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to - // set of z2r arrays only fill lower triangular Nelement matrix - // value = n = sum over rows of lower-triangular matrix until reach irow,icol - // swap indices when irow < icol to stay lower triangular - // if map = -1 (non-EAM atom in pair hybrid): - // type2z2r is not used by non-opt - // but set type2z2r to 0 since accessed by opt - - int irow,icol; - for (i = 1; i <= ntypes; i++) { - for (j = 1; j <= ntypes; j++) { - irow = map[i]; - icol = map[j]; - if (irow == -1 || icol == -1) { - type2z2r[i][j] = 0; - continue; - } - if (irow < icol) { - irow = map[j]; - icol = map[i]; - } - n = 0; - for (m = 0; m < irow; m++) n += m + 1; - n += icol; - type2z2r[i][j] = n; - } - } -} diff --git a/src/USER-CUDA/pair_eam_fs_cuda.h b/src/USER-CUDA/pair_eam_fs_cuda.h deleted file mode 100644 index 698b485875..0000000000 --- a/src/USER-CUDA/pair_eam_fs_cuda.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(eam/fs/cuda,PairEAMFSCuda) - -#else - -#ifndef LMP_PAIR_EAM_FS_CUDA_H -#define LMP_PAIR_EAM_FS_CUDA_H - -#include "pair_eam_cuda.h" - -namespace LAMMPS_NS { - -// use virtual public since this class is parent in multiple inheritance - -class PairEAMFSCuda : virtual public PairEAMCuda { - public: - PairEAMFSCuda(class LAMMPS *); - virtual ~PairEAMFSCuda() {} - void coeff(int, char **); - - protected: - class Cuda *cuda; - void read_file(char *); - void file2array(); -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_gran_hooke_cuda.cpp b/src/USER-CUDA/pair_gran_hooke_cuda.cpp deleted file mode 100644 index 3f60475ad5..0000000000 --- a/src/USER-CUDA/pair_gran_hooke_cuda.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_gran_hooke_cuda.h" -#include "pair_gran_hooke_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "modify.h" -#include "fix_pour.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairGranHookeCuda::PairGranHookeCuda(LAMMPS *lmp) : PairGranHooke(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairGranHookeCuda::allocate() -{ - if(! allocated) PairGranHooke::allocate(); - if(! allocated2) - { - allocated2 = true; - int n = atom->ntypes; - cuda->shared_data.pair.cutsq = cutsq; - memory->create(cuda->shared_data.pair.coeff1,n+1,n+1, - "pair:cuda_coeff1"); - memory->create(cuda->shared_data.pair.coeff2, - n+1,n+1,"pair:cuda_coeff2"); - cuda->shared_data.pair.coeff1[0][0]=kn; - cuda->shared_data.pair.coeff1[0][1]=kt; - cuda->shared_data.pair.coeff1[1][0]=gamman; - cuda->shared_data.pair.coeff1[1][1]=gammat; - cuda->shared_data.pair.coeff2[0][0]=xmu; - cuda->shared_data.pair.coeff2[0][1]=dampflag; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairGranHookeCuda::compute(int eflag, int vflag) -{ - cuda->shared_data.pair.use_block_per_atom = 0; - //cuda->cu_debugdata->memset_device(0); - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairGranHookeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - //cuda->cu_debugdata->download(); - //printf("%lf %lf %lf %lf %lf %lf\n",1.0e-6*cuda->debugdata[0],1.0e-6*cuda->debugdata[1],1.0e-6*cuda->debugdata[2],1.0e-6*cuda->debugdata[3],1.0e-6*cuda->debugdata[4],1.0e-6*cuda->debugdata[5]); - -} - -/* ---------------------------------------------------------------------- */ - -void PairGranHookeCuda::settings(int narg, char **arg) -{ - PairGranHooke::settings(narg, arg); - } - -/* ---------------------------------------------------------------------- */ - -void PairGranHookeCuda::coeff(int narg, char **arg) -{ - PairGranHooke::coeff(narg, arg); - allocate(); -} - -void PairGranHookeCuda::init_style() -{ - int i; - MYDBG(printf("# CUDA PairGranHookeCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->gran = 1; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - if (!atom->radius_flag || !atom->omega_flag || !atom->torque_flag) - error->all(FLERR,"Pair granular requires atom attributes radius, omega, torque"); - if (comm->ghost_velocity == 0) - error->all(FLERR,"Pair granular requires ghost atoms store velocity"); - - // need a half neigh list and optionally a granular history neigh list - - dt = update->dt; - - // check for Fix freeze and set freeze_group_bit - - for (i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"freeze") == 0) break; - if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit; - else freeze_group_bit = 0; - - cuda->shared_data.pair.freeze_group_bit=freeze_group_bit; - - // check for FixPour and FixDeposit so can extract particle radii - - int ipour; - for (ipour = 0; ipour < modify->nfix; ipour++) - if (strcmp(modify->fix[ipour]->style,"pour") == 0) break; - if (ipour == modify->nfix) ipour = -1; - - int idep; - for (idep = 0; idep < modify->nfix; idep++) - if (strcmp(modify->fix[idep]->style,"deposit") == 0) break; - if (idep == modify->nfix) idep = -1; - - // set maxrad_dynamic and maxrad_frozen for each type - // include future FixPour and FixDeposit particles as dynamic - - int itype; - for (i = 1; i <= atom->ntypes; i++) { - onerad_dynamic[i] = onerad_frozen[i] = 0.0; - if (ipour >= 0) { - itype = i; - onerad_dynamic[i] = - *((double *) modify->fix[ipour]->extract("radius",itype)); - } - if (idep >= 0) { - itype = i; - onerad_dynamic[i] = - *((double *) modify->fix[idep]->extract("radius",itype)); - } - } - - double *radius = atom->radius; - int *mask = atom->mask; - int *type = atom->type; - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) - if (mask[i] & freeze_group_bit) - onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]); - else - onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]); - - MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes, - MPI_DOUBLE,MPI_MAX,world); - MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes, - MPI_DOUBLE,MPI_MAX,world); -} - -void PairGranHookeCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairGranHookeCuda::init_list\n");) - PairGranHooke::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairGranHookeCuda::init_list end\n");) -} - -void PairGranHookeCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairGranHooke::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_gran_hooke_cuda.h b/src/USER-CUDA/pair_gran_hooke_cuda.h deleted file mode 100644 index 6fa622ab40..0000000000 --- a/src/USER-CUDA/pair_gran_hooke_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(gran/hooke/cuda,PairGranHookeCuda) - -#else - -#ifndef PAIR_GRAN_HOOKE_CUDA_H -#define PAIR_GRAN_HOOKE_CUDA_H - -#include "pair_gran_hooke.h" - -namespace LAMMPS_NS { - -class PairGranHookeCuda : public PairGranHooke -{ - public: - PairGranHookeCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj96_cut_cuda.cpp b/src/USER-CUDA/pair_lj96_cut_cuda.cpp deleted file mode 100644 index 7edb722d36..0000000000 --- a/src/USER-CUDA/pair_lj96_cut_cuda.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj96_cut_cuda.h" -#include "pair_lj96_cut_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJ96CutCuda::PairLJ96CutCuda(LAMMPS *lmp) : PairLJ96Cut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJ96CutCuda::allocate() -{ - if(! allocated) PairLJ96Cut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJ96CutCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJ96CutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJ96CutCuda::settings(int narg, char **arg) -{ - PairLJ96Cut::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJ96CutCuda::coeff(int narg, char **arg) -{ - PairLJ96Cut::coeff(narg, arg); - allocate(); -} - -void PairLJ96CutCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJ96CutCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - - cut_respa = NULL; - MYDBG(printf("# CUDA PairLJ96CutCuda::init_style end\n"); ) -} - -void PairLJ96CutCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJ96CutCuda::init_list\n");) - PairLJ96Cut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJ96CutCuda::init_list end\n");) -} - -void PairLJ96CutCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJ96Cut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj96_cut_cuda.h b/src/USER-CUDA/pair_lj96_cut_cuda.h deleted file mode 100644 index 8a8f36e504..0000000000 --- a/src/USER-CUDA/pair_lj96_cut_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj96/cut/cuda,PairLJ96CutCuda) - -#else - -#ifndef PAIR_LJ96_CUT_CUDA_H -#define PAIR_LJ96_CUT_CUDA_H - -#include "pair_lj96_cut.h" - -namespace LAMMPS_NS { - -class PairLJ96CutCuda : public PairLJ96Cut -{ - public: - PairLJ96CutCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp deleted file mode 100644 index 3a0ad0e288..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_charmm_coul_charmm_cuda.h" -#include "pair_lj_charmm_coul_charmm_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCharmmCoulCharmmCuda::PairLJCharmmCoulCharmmCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmm(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmCuda::allocate() -{ - if(! allocated) PairLJCharmmCoulCharmm::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJCharmmCoulCharmmCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmCuda::settings(int narg, char **arg) -{ - PairLJCharmmCoulCharmm::settings(narg, arg); - cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj; - cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmCuda::coeff(int narg, char **arg) -{ - PairLJCharmmCoulCharmm::coeff(narg, arg); - allocate(); -} - -void PairLJCharmmCoulCharmmCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - if(atom->molecular) - { - cuda->shared_data.pair.collect_forces_later = 1; - } - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul) - error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff"); - - cut_lj_innersq = cut_lj_inner * cut_lj_inner; - cut_ljsq = cut_lj * cut_lj; - cut_coul_innersq = cut_coul_inner * cut_coul_inner; - cut_coulsq = cut_coul * cut_coul; - cut_bothsq = MAX(cut_ljsq,cut_coulsq); - - denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - (cut_ljsq-cut_lj_innersq); - denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) * - (cut_coulsq-cut_coul_innersq); - - cut_coulsq = cut_coul * cut_coul; - - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; -} - -void PairLJCharmmCoulCharmmCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list\n");) - PairLJCharmmCoulCharmm::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list end\n");) -} - -void PairLJCharmmCoulCharmmCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCharmmCoulCharmm::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.h b/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.h deleted file mode 100644 index c19411f03c..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/charmm/coul/charmm/cuda,PairLJCharmmCoulCharmmCuda) - -#else - -#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H -#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H - -#include "pair_lj_charmm_coul_charmm.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJCharmmCoulCharmmCuda : public PairLJCharmmCoulCharmm -{ - public: - PairLJCharmmCoulCharmmCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp deleted file mode 100644 index c2f2ca871f..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_charmm_coul_charmm_implicit_cuda.h" -#include "pair_lj_charmm_coul_charmm_implicit_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCharmmCoulCharmmImplicitCuda::PairLJCharmmCoulCharmmImplicitCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmmImplicit(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.collect_forces_later = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmImplicitCuda::allocate() -{ - if(! allocated) PairLJCharmmCoulCharmmImplicit::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmImplicitCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJCharmmCoulCharmmImplicitCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmImplicitCuda::settings(int narg, char **arg) -{ - PairLJCharmmCoulCharmmImplicit::settings(narg, arg); - cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj; - cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulCharmmImplicitCuda::coeff(int narg, char **arg) -{ - PairLJCharmmCoulCharmmImplicit::coeff(narg, arg); - allocate(); -} - -void PairLJCharmmCoulCharmmImplicitCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul) - error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff"); - - cut_lj_innersq = cut_lj_inner * cut_lj_inner; - cut_ljsq = cut_lj * cut_lj; - cut_coul_innersq = cut_coul_inner * cut_coul_inner; - cut_coulsq = cut_coul * cut_coul; - cut_bothsq = MAX(cut_ljsq,cut_coulsq); - - denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - (cut_ljsq-cut_lj_innersq); - denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) * - (cut_coulsq-cut_coul_innersq); - - cut_coulsq = cut_coul * cut_coul; - - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; -} - -void PairLJCharmmCoulCharmmImplicitCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list\n");) - PairLJCharmmCoulCharmmImplicit::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list end\n");) -} - -void PairLJCharmmCoulCharmmImplicitCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCharmmCoulCharmmImplicit::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.h b/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.h deleted file mode 100644 index b3cc8c9336..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/charmm/coul/charmm/implicit/cuda,PairLJCharmmCoulCharmmImplicitCuda) - -#else - -#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H -#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H - -#include "pair_lj_charmm_coul_charmm_implicit.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJCharmmCoulCharmmImplicitCuda : public PairLJCharmmCoulCharmmImplicit -{ - public: - PairLJCharmmCoulCharmmImplicitCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp deleted file mode 100644 index b228bd6f41..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_charmm_coul_long_cuda.h" -#include "pair_lj_charmm_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define A1 0.254829592 -#define A2 -0.284496736 -#define A3 1.421413741 -#define A4 -1.453152027 -#define A5 1.061405429 -/* ---------------------------------------------------------------------- */ - -PairLJCharmmCoulLongCuda::PairLJCharmmCoulLongCuda(LAMMPS *lmp) : PairLJCharmmCoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.collect_forces_later = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCharmmCoulLongCuda::allocate() -{ - if(! allocated) PairLJCharmmCoulLong::allocate(); - if(! allocated2) - { - allocated2 = true; - //cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulLongCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJCharmmCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulLongCuda::settings(int narg, char **arg) -{ - PairLJCharmmCoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj; - cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCharmmCoulLongCuda::coeff(int narg, char **arg) -{ - PairLJCharmmCoulLong::coeff(narg, arg); - allocate(); -} - -void PairLJCharmmCoulLongCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - if (cut_lj_inner >= cut_lj) - error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff"); - - cut_lj_innersq = cut_lj_inner * cut_lj_inner; - cut_ljsq = cut_lj * cut_lj; - cut_coulsq = cut_coul * cut_coul; - cut_bothsq = MAX(cut_ljsq,cut_coulsq); - - denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - (cut_ljsq-cut_lj_innersq); - - cut_coulsq = cut_coul * cut_coul; - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - if (force->kspace == NULL) - error->all(FLERR,"Pair style is incompatible with KSpace style"); - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairLJCharmmCoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list\n");) - PairLJCharmmCoulLong::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list end\n");) -} - -void PairLJCharmmCoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCharmmCoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.h b/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.h deleted file mode 100644 index 8d9048a341..0000000000 --- a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/charmm/coul/long/cuda,PairLJCharmmCoulLongCuda) - -#else - -#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H -#define LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H - -#include "pair_lj_charmm_coul_long.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJCharmmCoulLongCuda : public PairLJCharmmCoulLong -{ - public: - PairLJCharmmCoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp b/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp deleted file mode 100644 index 01b6dc071f..0000000000 --- a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_class2_coul_cut_cuda.h" -#include "pair_lj_class2_coul_cut_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJClass2CoulCutCuda::PairLJClass2CoulCutCuda(LAMMPS *lmp) : PairLJClass2CoulCut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJClass2CoulCutCuda::allocate() -{ - if(! allocated) PairLJClass2CoulCut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.cut_coul= cut_coul; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulCutCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJClass2CoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulCutCuda::settings(int narg, char **arg) -{ - PairLJClass2CoulCut::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; - cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulCutCuda::coeff(int narg, char **arg) -{ - PairLJClass2CoulCut::coeff(narg, arg); - allocate(); -} - -void PairLJClass2CoulCutCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/cut/coul/cut/cuda requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - -} - -void PairLJClass2CoulCutCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list\n");) - PairLJClass2CoulCut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list end\n");) -} - -void PairLJClass2CoulCutCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJClass2CoulCut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.h b/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.h deleted file mode 100644 index 6601e2797c..0000000000 --- a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/class2/coul/cut/cuda,PairLJClass2CoulCutCuda) - -#else - -#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H -#define LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H - -#include "pair_lj_class2_coul_cut.h" - -namespace LAMMPS_NS { - -class PairLJClass2CoulCutCuda : public PairLJClass2CoulCut -{ - public: - PairLJClass2CoulCutCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp deleted file mode 100644 index 20f257ffea..0000000000 --- a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_class2_coul_long_cuda.h" -#include "pair_lj_class2_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define A1 0.254829592 -#define A2 -0.284496736 -#define A3 1.421413741 -#define A4 -1.453152027 -#define A5 1.061405429 -/* ---------------------------------------------------------------------- */ - -PairLJClass2CoulLongCuda::PairLJClass2CoulLongCuda(LAMMPS *lmp) : PairLJClass2CoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJClass2CoulLongCuda::allocate() -{ - if(! allocated) PairLJClass2CoulLong::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulLongCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJClass2CoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulLongCuda::settings(int narg, char **arg) -{ - PairLJClass2CoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2CoulLongCuda::coeff(int narg, char **arg) -{ - PairLJClass2CoulLong::coeff(narg, arg); - allocate(); -} - -void PairLJClass2CoulLongCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - cut_coulsq = cut_coul * cut_coul; - cuda->shared_data.pair.cut_coul_global=cut_coul; - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - // set rRESPA cutoffs - - if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster."); - if (force->kspace == NULL) - error->all(FLERR,"Pair style is incompatible with KSpace style"); - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairLJClass2CoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list\n");) - PairLJClass2CoulLong::init_list(id, ptr); - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list end\n");) -} - -void PairLJClass2CoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJClass2CoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.h b/src/USER-CUDA/pair_lj_class2_coul_long_cuda.h deleted file mode 100644 index 43af51b4ed..0000000000 --- a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/class2/coul/long/cuda,PairLJClass2CoulLongCuda) - -#else - -#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H -#define LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H - -#include "pair_lj_class2_coul_long.h" - -namespace LAMMPS_NS { - -class PairLJClass2CoulLongCuda : public PairLJClass2CoulLong -{ - public: - PairLJClass2CoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_class2_cuda.cpp b/src/USER-CUDA/pair_lj_class2_cuda.cpp deleted file mode 100644 index 6b9f686c13..0000000000 --- a/src/USER-CUDA/pair_lj_class2_cuda.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_class2_cuda.h" -#include "pair_lj_class2_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJClass2Cuda::PairLJClass2Cuda(LAMMPS *lmp) : PairLJClass2(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJClass2Cuda::allocate() -{ - if(! allocated) PairLJClass2::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2Cuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJClass2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2Cuda::settings(int narg, char **arg) -{ - PairLJClass2::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJClass2Cuda::coeff(int narg, char **arg) -{ - PairLJClass2::coeff(narg, arg); - allocate(); -} - -void PairLJClass2Cuda::init_style() -{ - MYDBG(printf("# CUDA PairLJClass2Cuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - MYDBG(printf("# CUDA PairLJClass2Cuda::init_style end\n"); ) -} - -void PairLJClass2Cuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJClass2Cuda::init_list\n");) - PairLJClass2::init_list(id, ptr); - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - MYDBG(printf("# CUDA PairLJClass2Cuda::init_list end\n");) -} - -void PairLJClass2Cuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJClass2::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_class2_cuda.h b/src/USER-CUDA/pair_lj_class2_cuda.h deleted file mode 100644 index 6f2673c4a3..0000000000 --- a/src/USER-CUDA/pair_lj_class2_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/class2/cuda,PairLJClass2Cuda) - -#else - -#ifndef PAIR_LJ_CLASS2_CUDA_H -#define PAIR_LJ_CLASS2_CUDA_H - -#include "pair_lj_class2.h" - -namespace LAMMPS_NS { - -class PairLJClass2Cuda : public PairLJClass2 -{ - public: - PairLJClass2Cuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp deleted file mode 100644 index 3872be0d0e..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_cut_coul_cut_cuda.h" -#include "pair_lj_cut_coul_cut_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutCoulCutCuda::PairLJCutCoulCutCuda(LAMMPS *lmp) : PairLJCutCoulCut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCutCoulCutCuda::allocate() -{ - if(! allocated) PairLJCutCoulCut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.cut_coul= cut_coul; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulCutCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJCutCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulCutCuda::settings(int narg, char **arg) -{ - PairLJCutCoulCut::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; - cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulCutCuda::coeff(int narg, char **arg) -{ - PairLJCutCoulCut::coeff(narg, arg); - allocate(); -} - -void PairLJCutCoulCutCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/cut/coul/cut/cuda requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - -} - -void PairLJCutCoulCutCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list\n");) - PairLJCutCoulCut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list end\n");) -} - -void PairLJCutCoulCutCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCutCoulCut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.h b/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.h deleted file mode 100644 index 10f44c76da..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/coul/cut/cuda,PairLJCutCoulCutCuda) - -#else - -#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H -#define LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H - -#include "pair_lj_cut_coul_cut.h" - -namespace LAMMPS_NS { - -class PairLJCutCoulCutCuda : public PairLJCutCoulCut -{ - public: - PairLJCutCoulCutCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp deleted file mode 100644 index 43bcce68c9..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_cut_coul_debye_cuda.h" -#include "pair_lj_cut_coul_debye_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutCoulDebyeCuda::PairLJCutCoulDebyeCuda(LAMMPS *lmp) : PairLJCutCoulDebye(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCutCoulDebyeCuda::allocate() -{ - if(! allocated) PairLJCutCoulDebye::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.cut_coul= cut_coul; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulDebyeCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJCutCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulDebyeCuda::settings(int narg, char **arg) -{ - PairLJCutCoulDebye::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; - cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global; - cuda->shared_data.pair.kappa = (F_CFLOAT) kappa; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulDebyeCuda::coeff(int narg, char **arg) -{ - PairLJCutCoulDebye::coeff(narg, arg); - allocate(); -} - -void PairLJCutCoulDebyeCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/cut/coul/debye/cuda requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - -} - -void PairLJCutCoulDebyeCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list\n");) - PairLJCutCoulDebye::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list end\n");) -} - -void PairLJCutCoulDebyeCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCutCoulDebye::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.h b/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.h deleted file mode 100644 index aea3a42f66..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/coul/debye/cuda,PairLJCutCoulDebyeCuda) - -#else - -#ifndef LMP_PAIR_LJ_CUT_COUL_DEBYE_CUDA_H -#define LMP_PAIR_LJ_CUT_COUL_DEBYE_CUDA_H - -#include "pair_lj_cut_coul_debye.h" - -namespace LAMMPS_NS { - -class PairLJCutCoulDebyeCuda : public PairLJCutCoulDebye -{ - public: - PairLJCutCoulDebyeCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp deleted file mode 100644 index 52397f9429..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_cut_coul_long_cuda.h" -#include "pair_lj_cut_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -#define EWALD_F 1.12837917 -#define EWALD_P 0.3275911 -#define A1 0.254829592 -#define A2 -0.284496736 -#define A3 1.421413741 -#define A4 -1.453152027 -#define A5 1.061405429 -/* ---------------------------------------------------------------------- */ - -PairLJCutCoulLongCuda::PairLJCutCoulLongCuda(LAMMPS *lmp) : PairLJCutCoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCutCoulLongCuda::allocate() -{ - if(! allocated) PairLJCutCoulLong::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulLongCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJCutCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulLongCuda::settings(int narg, char **arg) -{ - PairLJCutCoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCoulLongCuda::coeff(int narg, char **arg) -{ - PairLJCutCoulLong::coeff(narg, arg); - allocate(); -} - -void PairLJCutCoulLongCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - int respa = 0; - if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; - if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; - - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->respaouter = 1; - } - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - } - - cut_coulsq = cut_coul * cut_coul; - cuda->shared_data.pair.cut_coul_global=cut_coul; - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - // set rRESPA cutoffs - - if (strstr(update->integrate_style,"respa") && - ((Respa *) update->integrate)->level_inner >= 0) - cut_respa = ((Respa *) update->integrate)->cutoff; - else cut_respa = NULL; - - if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster."); - if (force->kspace == NULL) - error->all(FLERR,"Pair style is incompatible with KSpace style"); - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - - - if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n"); -} - -void PairLJCutCoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCutCoulLongCuda::init_list\n");) - PairLJCutCoulLong::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCutCoulLongCuda::init_list end\n");) -} - -void PairLJCutCoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCutCoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.h b/src/USER-CUDA/pair_lj_cut_coul_long_cuda.h deleted file mode 100644 index 2c7e55eb1e..0000000000 --- a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/coul/long/cuda,PairLJCutCoulLongCuda) - -#else - -#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_CUDA_H -#define LMP_PAIR_LJ_CUT_COUL_LONG_CUDA_H - -#include "pair_lj_cut_coul_long.h" - -namespace LAMMPS_NS { - -class PairLJCutCoulLongCuda : public PairLJCutCoulLong -{ - public: - PairLJCutCoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_cut_cuda.cpp b/src/USER-CUDA/pair_lj_cut_cuda.cpp deleted file mode 100644 index a5d4f47a51..0000000000 --- a/src/USER-CUDA/pair_lj_cut_cuda.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_cut_cuda.h" -#include "pair_lj_cut_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutCuda::PairLJCutCuda(LAMMPS *lmp) : PairLJCut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCutCuda::allocate() -{ - if(! allocated) PairLJCut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCuda::settings(int narg, char **arg) -{ - PairLJCut::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutCuda::coeff(int narg, char **arg) -{ - PairLJCut::coeff(narg, arg); - allocate(); -} - -void PairLJCutCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJCutCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - - cut_respa = NULL; - MYDBG(printf("# CUDA PairLJCutCuda::init_style end\n"); ) -} - -void PairLJCutCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCutCuda::init_list\n");) - PairLJCut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCutCuda::init_list end\n");) -} - -void PairLJCutCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_cut_cuda.h b/src/USER-CUDA/pair_lj_cut_cuda.h deleted file mode 100644 index f42c7d04c0..0000000000 --- a/src/USER-CUDA/pair_lj_cut_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/cuda,PairLJCutCuda) - -#else - -#ifndef PAIR_LJ_CUT_CUDA_H -#define PAIR_LJ_CUT_CUDA_H - -#include "pair_lj_cut.h" - -namespace LAMMPS_NS { - -class PairLJCutCuda : public PairLJCut -{ - public: - PairLJCutCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp b/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp deleted file mode 100644 index f60aaa6f38..0000000000 --- a/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_cut_experimental_cuda.h" -#include "pair_lj_cut_experimental_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutExperimentalCuda::PairLJCutExperimentalCuda(LAMMPS *lmp) : PairLJCut(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJCutExperimentalCuda::allocate() -{ - if(! allocated) PairLJCut::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutExperimentalCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - Cuda_PairLJCutExperimentalCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - CudaWrapper_Sync(); - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - } - -/* ---------------------------------------------------------------------- */ - -void PairLJCutExperimentalCuda::settings(int narg, char **arg) -{ - PairLJCut::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutExperimentalCuda::coeff(int narg, char **arg) -{ - PairLJCut::coeff(narg, arg); - allocate(); -} - -void PairLJCutExperimentalCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - - cut_respa = NULL; - MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_style end\n"); ) -} - -void PairLJCutExperimentalCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_list\n");) - PairLJCut::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_list end\n");) -} - -void PairLJCutExperimentalCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJCut::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_cut_experimental_cuda.h b/src/USER-CUDA/pair_lj_cut_experimental_cuda.h deleted file mode 100644 index fafb2d63d4..0000000000 --- a/src/USER-CUDA/pair_lj_cut_experimental_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/experimental/cuda,PairLJCutExperimentalCuda) - -#else - -#ifndef PAIR_LJ_CUT_EXPERIMENTAL_CUDA_H -#define PAIR_LJ_CUT_EXPERIMENTAL_CUDA_H - -#include "pair_lj_cut.h" - -namespace LAMMPS_NS { - -class PairLJCutExperimentalCuda : public PairLJCut -{ - public: - PairLJCutExperimentalCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_expand_cuda.cpp b/src/USER-CUDA/pair_lj_expand_cuda.cpp deleted file mode 100644 index a102dea75a..0000000000 --- a/src/USER-CUDA/pair_lj_expand_cuda.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_expand_cuda.h" -#include "pair_lj_expand_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJExpandCuda::PairLJExpandCuda(LAMMPS *lmp) : PairLJExpand(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJExpandCuda::allocate() -{ - if(! allocated) PairLJExpand::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.cutsq = cutsq; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = shift; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJExpandCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJExpandCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJExpandCuda::settings(int narg, char **arg) -{ - PairLJExpand::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJExpandCuda::coeff(int narg, char **arg) -{ - PairLJExpand::coeff(narg, arg); - allocate(); -} - -void PairLJExpandCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJExpandCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - - MYDBG(printf("# CUDA PairLJExpandCuda::init_style end\n"); ) -} - -void PairLJExpandCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJExpandCuda::init_list\n");) - PairLJExpand::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJExpandCuda::init_list end\n");) -} - -void PairLJExpandCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJExpand::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_expand_cuda.h b/src/USER-CUDA/pair_lj_expand_cuda.h deleted file mode 100644 index b61578c295..0000000000 --- a/src/USER-CUDA/pair_lj_expand_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/expand/cuda,PairLJExpandCuda) - -#else - -#ifndef PAIR_LJ_EXPAND_CUDA_H -#define PAIR_LJ_EXPAND_CUDA_H - -#include "pair_lj_expand.h" - -namespace LAMMPS_NS { - -class PairLJExpandCuda : public PairLJExpand -{ - public: - PairLJExpandCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp b/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp deleted file mode 100644 index 73df6a66cb..0000000000 --- a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_gromacs_coul_gromacs_cuda.h" -#include "pair_lj_gromacs_coul_gromacs_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJGromacsCoulGromacsCuda::PairLJGromacsCoulGromacsCuda(LAMMPS *lmp) : PairLJGromacsCoulGromacs(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJGromacsCoulGromacsCuda::allocate() -{ - if(! allocated) PairLJGromacsCoulGromacs::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = ljsw1; - cuda->shared_data.pair.coeff6 = ljsw2; - cuda->shared_data.pair.coeff7 = ljsw3; - cuda->shared_data.pair.coeff8 = ljsw4; - cuda->shared_data.pair.coeff9 = ljsw5; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw5_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw5, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCoulGromacsCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJGromacsCoulGromacsCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,cut_coul_inner,coulsw1,coulsw2,coulsw5); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCoulGromacsCuda::settings(int narg, char **arg) -{ - PairLJGromacsCoulGromacs::settings(narg, arg); - cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj; - cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCoulGromacsCuda::coeff(int narg, char **arg) -{ - PairLJGromacsCoulGromacs::coeff(narg, arg); - allocate(); -} - -void PairLJGromacsCoulGromacsCuda::init_style() -{ - if (!atom->q_flag) - error->all(FLERR,"Pair style lj/gromacs/coul/gromacs requires atom attribute q"); - // request regular or rRESPA neighbor lists - - if(atom->molecular) - { - cuda->shared_data.pair.collect_forces_later = 1; - } - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul) - error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff"); - - cut_lj_innersq = cut_lj_inner * cut_lj_inner; - cut_ljsq = cut_lj * cut_lj; - cut_coul_innersq = cut_coul_inner * cut_coul_inner; - cut_coulsq = cut_coul * cut_coul; - cut_bothsq = MAX(cut_ljsq,cut_coulsq); - - - cut_coulsq = cut_coul * cut_coul; - - cuda->shared_data.pair.cut_coulsq_global=cut_coulsq; - - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; -} - -void PairLJGromacsCoulGromacsCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJGromacsCoulGromacsCuda::init_list\n");) - PairLJGromacsCoulGromacs::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJGromacsCoulGromacsCuda::init_list end\n");) -} - -void PairLJGromacsCoulGromacsCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJGromacsCoulGromacs::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.h b/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.h deleted file mode 100644 index 6e48df1931..0000000000 --- a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/gromacs/coul/gromacs/cuda,PairLJGromacsCoulGromacsCuda) - -#else - -#ifndef LMP_PAIR_LJ_GROMACS_COUL_GROMACS_CUDA_H -#define LMP_PAIR_LJ_GROMACS_COUL_GROMACS_CUDA_H - -#include "pair_lj_gromacs_coul_gromacs.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJGromacsCoulGromacsCuda : public PairLJGromacsCoulGromacs -{ - public: - PairLJGromacsCoulGromacsCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw1_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw2_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw3_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw4_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw5_gm; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_gromacs_cuda.cpp b/src/USER-CUDA/pair_lj_gromacs_cuda.cpp deleted file mode 100644 index b2786d81ab..0000000000 --- a/src/USER-CUDA/pair_lj_gromacs_cuda.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_gromacs_cuda.h" -#include "pair_lj_gromacs_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJGromacsCuda::PairLJGromacsCuda(LAMMPS *lmp) : PairLJGromacs(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJGromacsCuda::allocate() -{ - if(! allocated) PairLJGromacs::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.cut_inner = cut_inner; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = ljsw1; - cuda->shared_data.pair.coeff6 = ljsw2; - cuda->shared_data.pair.coeff7 = ljsw3; - cuda->shared_data.pair.coeff8 = ljsw4; - cuda->shared_data.pair.coeff9 = ljsw5; - cuda->shared_data.pair.special_lj = force->special_lj; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw5_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw5, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJGromacsCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCuda::settings(int narg, char **arg) -{ - PairLJGromacs::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_inner_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJGromacsCuda::coeff(int narg, char **arg) -{ - PairLJGromacs::coeff(narg, arg); - allocate(); -} - -void PairLJGromacsCuda::init_style() -{ - // request regular or rRESPA neighbor lists - - if(atom->molecular) - { - cuda->shared_data.pair.collect_forces_later = 1; - } - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - -} - -void PairLJGromacsCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJGromacsCuda::init_list\n");) - PairLJGromacs::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJGromacsCuda::init_list end\n");) -} - -void PairLJGromacsCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJGromacs::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_gromacs_cuda.h b/src/USER-CUDA/pair_lj_gromacs_cuda.h deleted file mode 100644 index b4bbc15c6d..0000000000 --- a/src/USER-CUDA/pair_lj_gromacs_cuda.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/gromacs/cuda,PairLJGromacsCuda) - -#else - -#ifndef LMP_PAIR_LJ_GROMACS_CUDA_H -#define LMP_PAIR_LJ_GROMACS_CUDA_H - -#include "pair_lj_gromacs.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJGromacsCuda : public PairLJGromacs -{ - public: - PairLJGromacsCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw1_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw2_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw3_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw4_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw5_gm; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp deleted file mode 100644 index aca0f6d013..0000000000 --- a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_sdk_coul_long_cuda.h" -#include "pair_lj_sdk_coul_long_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJSDKCoulLongCuda::PairLJSDKCoulLongCuda(LAMMPS *lmp) : PairLJSDKCoulLong(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - lj_type_double = NULL; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJSDKCoulLongCuda::allocate() -{ - if(! allocated) PairLJSDKCoulLong::allocate(); - int n = atom->ntypes; - if(! allocated2) - { - allocated2 = true; - - - memory->create(lj_type_double,n+1,n+1,"pairlj:ljtypedouble"); - - cuda->shared_data.pair.cut = cut_lj; - cuda->shared_data.pair.cut_coul= NULL; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = lj_type_double; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - - } - for (int i = 1; i <= n; i++) { - for (int j = i; j <= n; j++) { - lj_type_double[i][j] = lj_type[i][j]; - lj_type_double[j][i] = lj_type[i][j]; - } - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCoulLongCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(eflag) cuda->cu_eng_coul->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJSDKCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(eflag) cuda->cu_eng_coul->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCoulLongCuda::settings(int narg, char **arg) -{ - PairLJSDKCoulLong::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global; - cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCoulLongCuda::coeff(int narg, char **arg) -{ - PairLJSDKCoulLong::coeff(narg, arg); - allocate(); -} - -void PairLJSDKCoulLongCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - g_ewald = force->kspace->g_ewald; - cuda->shared_data.pair.g_ewald=g_ewald; - cuda->shared_data.pppm.qqrd2e=force->qqrd2e; - if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster."); - MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_style end\n"); ) -} - -void PairLJSDKCoulLongCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_list\n");) - PairLJSDKCoulLong::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_list end\n");) -} - -void PairLJSDKCoulLongCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJSDKCoulLong::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.h b/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.h deleted file mode 100644 index 4b5c07c79d..0000000000 --- a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(cg/cmm/coul/long/cuda,PairLJSDKCoulLongCuda) -PairStyle(lj/sdk/coul/long/cuda,PairLJSDKCoulLongCuda) - -#else - -#ifndef PAIR_LJ_SDK_COUL_LONG_CUDA_H -#define PAIR_LJ_SDK_COUL_LONG_CUDA_H - -#include "pair_lj_sdk_coul_long.h" - -namespace LAMMPS_NS { - -class PairLJSDKCoulLongCuda : public PairLJSDKCoulLong -{ - public: - PairLJSDKCoulLongCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - double** lj_type_double; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_sdk_cuda.cpp b/src/USER-CUDA/pair_lj_sdk_cuda.cpp deleted file mode 100644 index f6eba1ba7b..0000000000 --- a/src/USER-CUDA/pair_lj_sdk_cuda.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_sdk_cuda.h" -#include "pair_lj_sdk_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJSDKCuda::PairLJSDKCuda(LAMMPS *lmp) : PairLJSDK(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - lj_type_double = NULL; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJSDKCuda::allocate() -{ - if(! allocated) PairLJSDK::allocate(); - int n = atom->ntypes; - if(! allocated2) - { - allocated2 = true; - - - memory->create(lj_type_double,n+1,n+1,"pairlj:ljtypedouble"); - - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = lj_type_double; - /*cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj_type_double_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj_type_double, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));*/ - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - } - for (int i = 1; i <= n; i++) { - for (int j = i; j <= n; j++) { - lj_type_double[i][j] = lj_type[i][j]; - lj_type_double[j][i] = lj_type[i][j]; - } - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairLJSDKCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCuda::settings(int narg, char **arg) -{ - PairLJSDK::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSDKCuda::coeff(int narg, char **arg) -{ - PairLJSDK::coeff(narg, arg); - allocate(); -} - -void PairLJSDKCuda::init_style() -{ - MYDBG(printf("# CUDA PairLJSDKCuda::init_style start\n"); ) - - int irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - MYDBG(printf("# CUDA PairLJSDKCuda::init_style end\n"); ) -} - -void PairLJSDKCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJSDKCuda::init_list\n");) - PairLJSDK::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJSDKCuda::init_list end\n");) -} - -void PairLJSDKCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJSDK::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_sdk_cuda.h b/src/USER-CUDA/pair_lj_sdk_cuda.h deleted file mode 100644 index 5e7807cbd7..0000000000 --- a/src/USER-CUDA/pair_lj_sdk_cuda.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/sdk/cuda,PairLJSDKCuda) -PairStyle(cg/cmm/cuda,PairLJSDKCuda) - -#else - -#ifndef PAIR_LJ_SDK_CUDA_H -#define PAIR_LJ_SDK_CUDA_H - -#include "pair_lj_sdk.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJSDKCuda : public PairLJSDK -{ - public: - PairLJSDKCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - double** lj_type_double; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj_type_double_gm; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_lj_smooth_cuda.cpp b/src/USER-CUDA/pair_lj_smooth_cuda.cpp deleted file mode 100644 index 3a51e94fef..0000000000 --- a/src/USER-CUDA/pair_lj_smooth_cuda.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - Contributing author: Paul Crozier (SNL) - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_lj_smooth_cuda.h" -#include "pair_lj_smooth_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "kspace.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJSmoothCuda::PairLJSmoothCuda(LAMMPS *lmp) : PairLJSmooth(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.use_block_per_atom = 0; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairLJSmoothCuda::allocate() -{ - if(! allocated) PairLJSmooth::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.cut_inner = cut_inner; - cuda->shared_data.pair.coeff1 = lj1; - cuda->shared_data.pair.coeff2 = lj2; - cuda->shared_data.pair.coeff3 = lj3; - cuda->shared_data.pair.coeff4 = lj4; - cuda->shared_data.pair.coeff5 = ljsw1; - cuda->shared_data.pair.coeff6 = ljsw2; - cuda->shared_data.pair.coeff7 = ljsw3; - cuda->shared_data.pair.coeff8 = ljsw4; - cuda->shared_data.pair.coeff9 = ljsw0; - cuda->shared_data.pair.special_lj = force->special_lj; - cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw0_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw0, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1)); - cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1)); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSmoothCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - } - - Cuda_PairLJSmoothCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSmoothCuda::settings(int narg, char **arg) -{ - PairLJSmooth::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; - cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_inner_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSmoothCuda::coeff(int narg, char **arg) -{ - PairLJSmooth::coeff(narg, arg); - allocate(); -} - -void PairLJSmoothCuda::init_style() -{ - // request regular or rRESPA neighbor lists - - if(atom->molecular) - { - cuda->shared_data.pair.collect_forces_later = 1; - } - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - - - -} - -void PairLJSmoothCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairLJSmoothCuda::init_list\n");) - PairLJSmooth::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairLJSmoothCuda::init_list end\n");) -} - -void PairLJSmoothCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairLJSmooth::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_lj_smooth_cuda.h b/src/USER-CUDA/pair_lj_smooth_cuda.h deleted file mode 100644 index 0a57e6f663..0000000000 --- a/src/USER-CUDA/pair_lj_smooth_cuda.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/smooth/cuda,PairLJSmoothCuda) - -#else - -#ifndef LMP_PAIR_LJ_SMOOTH_CUDA_H -#define LMP_PAIR_LJ_SMOOTH_CUDA_H - -#include "pair_lj_smooth.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairLJSmoothCuda : public PairLJSmooth -{ - public: - PairLJSmoothCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - cCudaData<double , F_CFLOAT , x >* cu_lj1_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj2_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj3_gm; - cCudaData<double , F_CFLOAT , x >* cu_lj4_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw0_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw1_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw2_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw3_gm; - cCudaData<double , F_CFLOAT , x >* cu_ljsw4_gm; - -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_morse_cuda.cpp b/src/USER-CUDA/pair_morse_cuda.cpp deleted file mode 100644 index a38712aabe..0000000000 --- a/src/USER-CUDA/pair_morse_cuda.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_morse_cuda.h" -#include "pair_morse_cuda_cu.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairMorseCuda::PairMorseCuda(LAMMPS *lmp) : PairMorse(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - cuda->shared_data.pair.cudable_force = 1; - cuda->setSystemParams(); -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairMorseCuda::allocate() -{ - if(! allocated) PairMorse::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cut = cut; - cuda->shared_data.pair.coeff1 = r0; - cuda->shared_data.pair.coeff2 = alpha; - cuda->shared_data.pair.coeff3 = morse1; - cuda->shared_data.pair.coeff4 = d0; - cuda->shared_data.pair.offset = offset; - cuda->shared_data.pair.special_lj = force->special_lj; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairMorseCuda::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairMorseCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom); - - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } - -} - -/* ---------------------------------------------------------------------- */ - -void PairMorseCuda::settings(int narg, char **arg) -{ - PairMorse::settings(narg, arg); - cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global; -} - -/* ---------------------------------------------------------------------- */ - -void PairMorseCuda::coeff(int narg, char **arg) -{ - PairMorse::coeff(narg, arg); - allocate(); -} - -void PairMorseCuda::init_style() -{ - MYDBG(printf("# CUDA PairMorseCuda::init_style start\n"); ) - // request regular or rRESPA neighbor lists - - int irequest; - - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { - - } - else - { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - //neighbor->style=0; //0=NSQ neighboring - } - - - MYDBG(printf("# CUDA PairMorseCuda::init_style end\n"); ) -} - -void PairMorseCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairMorseCuda::init_list\n");) - PairMorse::init_list(id, ptr); - #ifndef CUDA_USE_BINNING - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - #endif - MYDBG(printf("# CUDA PairMorseCuda::init_list end\n");) -} - -void PairMorseCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairMorse::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} - -} diff --git a/src/USER-CUDA/pair_morse_cuda.h b/src/USER-CUDA/pair_morse_cuda.h deleted file mode 100644 index f76e687527..0000000000 --- a/src/USER-CUDA/pair_morse_cuda.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(morse/cuda,PairMorseCuda) - -#else - -#ifndef PAIR_MORSE_CUDA_H -#define PAIR_MORSE_CUDA_H - -#include "pair_morse.h" - -namespace LAMMPS_NS { - -class PairMorseCuda : public PairMorse -{ - public: - PairMorseCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_sw_cuda.cpp b/src/USER-CUDA/pair_sw_cuda.cpp deleted file mode 100644 index 1dfccc4e7b..0000000000 --- a/src/USER-CUDA/pair_sw_cuda.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_sw_cuda.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - - - - -/* ---------------------------------------------------------------------- */ - -PairSWCuda::PairSWCuda(LAMMPS *lmp) : PairSW(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - params_f = NULL; - cuda->setSystemParams(); - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.override_block_per_atom = 0; - cuda->shared_data.pair.neighall = true; - init = false; -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairSWCuda::allocate() -{ - if(! allocated) PairSW::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cutsq = cutsq; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairSWCuda::compute(int eflag, int vflag) -{ - if(!init) {Cuda_PairSWCuda_Init(&cuda->shared_data,params_f,map, &elem2param[0][0][0],nelements); init=true;} - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairSWCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);//,&elem2param[0][0][0],map - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairSWCuda::settings(int narg, char **arg) -{ - PairSW::settings(narg, arg); -} - -/* ---------------------------------------------------------------------- */ - -void PairSWCuda::coeff(int narg, char **arg) -{ - PairSW::coeff(narg, arg); - allocate(); - params_f = (ParamSW_Float *) memory->srealloc(params_f,maxparam*sizeof(ParamSW_Float), - "pair:params_f"); - for(int i=0;i<maxparam;i++) - { - printf("%e %e\n",params[i].cut,params[i].cutsq); - params_f[i].cut = params[i].cut; - params_f[i].cutsq = params[i].cutsq; - params_f[i].c1 = params[i].c1; - params_f[i].c2 = params[i].c2; - params_f[i].c3 = params[i].c3; - params_f[i].c4 = params[i].c4; - params_f[i].c5 = params[i].c5; - params_f[i].c6 = params[i].c6; - params_f[i].ielement = params[i].ielement; - params_f[i].jelement = params[i].jelement; - params_f[i].kelement = params[i].kelement; - params_f[i].epsilon = params[i].epsilon; - params_f[i].sigma = params[i].sigma; - params_f[i].littlea = params[i].littlea; - params_f[i].lambda = params[i].lambda; - params_f[i].costheta = params[i].costheta; - params_f[i].tol = params[i].tol; - params_f[i].sigma_gamma = params[i].sigma_gamma; - params_f[i].lambda_epsilon = params[i].lambda_epsilon; - params_f[i].lambda_epsilon2 = params[i].lambda_epsilon2; - params_f[i].gamma = params[i].gamma; - - params_f[i].biga = params[i].biga; - params_f[i].bigb = params[i].bigb; - params_f[i].gamma = params[i].gamma; - params_f[i].powerp = params[i].powerp; - params_f[i].powerq = params[i].powerq; - } - cuda->shared_data.pair.cut_global = cutmax; -} - -void PairSWCuda::init_style() -{ - MYDBG(printf("# CUDA PairSWCuda::init_style start\n"); ) - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - neighbor->requests[irequest]->ghost = 1; - - - MYDBG(printf("# CUDA PairSWCuda::init_style end\n"); ) -} - -void PairSWCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairSWCuda::init_list\n");) - PairSW::init_list(id, ptr); - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - MYDBG(printf("# CUDA PairSWCuda::init_list end\n");) - cu_params_f = (ParamSW_Float*) CudaWrapper_AllocCudaData(sizeof(ParamSW_Float)*maxparam); - CudaWrapper_UploadCudaData((void*) params_f,(void*) cu_params_f,sizeof(ParamSW_Float)*maxparam); - cu_elem2param = new cCudaData<int, int, xyz > ((int*) elem2param, nelements,nelements,nelements); - cu_elem2param->upload(); - cu_map = new cCudaData<int, int, x > ( map,atom->ntypes+1 ); - cu_map->upload(); -} - -void PairSWCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairSW::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} -} diff --git a/src/USER-CUDA/pair_sw_cuda.h b/src/USER-CUDA/pair_sw_cuda.h deleted file mode 100644 index c61f849a1f..0000000000 --- a/src/USER-CUDA/pair_sw_cuda.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(sw/cuda,PairSWCuda) - -#else - -#ifndef PAIR_SW_CUDA_H -#define PAIR_SW_CUDA_H - -#include "pair_sw_cuda_cu.h" -#include "pair_sw.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairSWCuda : public PairSW -{ - public: - PairSWCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - ParamSW_Float* params_f; - ParamSW_Float* cu_params_f; - cCudaData<int, int, xyz >* cu_elem2param; - cCudaData<int, int, x >* cu_map; - bool init; - bool iszbl; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_tersoff_cuda.cpp b/src/USER-CUDA/pair_tersoff_cuda.cpp deleted file mode 100644 index f22b551284..0000000000 --- a/src/USER-CUDA/pair_tersoff_cuda.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <cmath> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include "pair_tersoff_cuda.h" -#include "cuda_data.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "cuda_neigh_list.h" -#include "update.h" -#include "integrate.h" -#include "respa.h" -#include "memory.h" -#include "error.h" -#include "user_cuda.h" - -using namespace LAMMPS_NS; - - - - -/* ---------------------------------------------------------------------- */ - -PairTersoffCuda::PairTersoffCuda(LAMMPS *lmp) : PairTersoff(lmp) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - allocated2 = false; - params_f = NULL; - cuda->setSystemParams(); - cuda->shared_data.pair.cudable_force = 1; - cuda->shared_data.pair.override_block_per_atom = 0; - cuda->shared_data.pair.neighall = true; - init = false; - iszbl = false; -} - -/* ---------------------------------------------------------------------- - remember pointer to arrays in cuda shared data -------------------------------------------------------------------------- */ - -void PairTersoffCuda::allocate() -{ - if(! allocated) PairTersoff::allocate(); - if(! allocated2) - { - allocated2 = true; - cuda->shared_data.pair.cutsq = cutsq; - cuda->shared_data.pair.special_lj = force->special_lj; - cuda->shared_data.pair.special_coul = force->special_coul; - } -} - -/* ---------------------------------------------------------------------- */ - -void PairTersoffCuda::compute(int eflag, int vflag) -{ - if(!init) {Cuda_PairTersoffCuda_Init(&cuda->shared_data,params_f,map, &elem2param[0][0][0],nelements,iszbl); init=true;} - if (eflag || vflag) ev_setup(eflag,vflag); - if(eflag) cuda->cu_eng_vdwl->upload(); - if(vflag) cuda->cu_virial->upload(); - - Cuda_PairTersoffCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);//,&elem2param[0][0][0],map - if(not cuda->shared_data.pair.collect_forces_later) - { - if(eflag) cuda->cu_eng_vdwl->download(); - if(vflag) cuda->cu_virial->download(); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairTersoffCuda::settings(int narg, char **arg) -{ - PairTersoff::settings(narg, arg); -} - -/* ---------------------------------------------------------------------- */ - -void PairTersoffCuda::coeff(int narg, char **arg) -{ - PairTersoff::coeff(narg, arg); - allocate(); - params_f = (Param_Float *) memory->srealloc(params_f,maxparam*sizeof(Param_Float), - "pair:params_f"); - for(int i=0;i<maxparam;i++) - { - params_f[i].lam1 = params[i].lam1; - params_f[i].lam2 = params[i].lam2; - params_f[i].lam3 = params[i].lam3; - params_f[i].c = params[i].c; - params_f[i].d = params[i].d; - params_f[i].h = params[i].h; - params_f[i].gamma = params[i].gamma; - params_f[i].powerm = params[i].powerm; - params_f[i].powern = params[i].powern; - params_f[i].beta = params[i].beta; - params_f[i].biga = params[i].biga; - params_f[i].bigb = params[i].bigb; - params_f[i].bigd = params[i].bigd; - params_f[i].bigr = params[i].bigr; - params_f[i].cut = params[i].cut; - params_f[i].cutsq = params[i].cutsq; - params_f[i].c1 = params[i].c1; - params_f[i].c2 = params[i].c2; - params_f[i].c3 = params[i].c3; - params_f[i].c4 = params[i].c4; - params_f[i].ielement = params[i].ielement; - params_f[i].jelement = params[i].jelement; - params_f[i].kelement = params[i].kelement; - params_f[i].powermint = params[i].powermint; - } - cuda->shared_data.pair.cut_global = cutmax; -} - -void PairTersoffCuda::init_style() -{ - MYDBG(printf("# CUDA PairTersoffCuda::init_style start\n"); ) - - int irequest; - - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->cudable = 1; - neighbor->requests[irequest]->ghost = 1; - - - MYDBG(printf("# CUDA PairTersoffCuda::init_style end\n"); ) -} - -void PairTersoffCuda::init_list(int id, NeighList *ptr) -{ - MYDBG(printf("# CUDA PairTersoffCuda::init_list\n");) - PairTersoff::init_list(id, ptr); - // right now we can only handle verlet (id 0), not respa - if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr); - // see Neighbor::init() for details on lammps lists' logic - MYDBG(printf("# CUDA PairTersoffCuda::init_list end\n");) - cu_params_f = (Param_Float*) CudaWrapper_AllocCudaData(sizeof(Param_Float)*maxparam); - CudaWrapper_UploadCudaData((void*) params_f,(void*) cu_params_f,sizeof(Param_Float)*maxparam); - cu_elem2param = new cCudaData<int, int, xyz > ((int*) elem2param, nelements,nelements,nelements); - cu_elem2param->upload(); - cu_map = new cCudaData<int, int, x > ( map,atom->ntypes+1 ); - cu_map->upload(); -} - -void PairTersoffCuda::ev_setup(int eflag, int vflag) -{ - int maxeatomold=maxeatom; - PairTersoff::ev_setup(eflag,vflag); - - if (eflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );} - - if (vflag_atom && atom->nmax > maxeatomold) - {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6 );} -} diff --git a/src/USER-CUDA/pair_tersoff_cuda.h b/src/USER-CUDA/pair_tersoff_cuda.h deleted file mode 100644 index 5b829114f2..0000000000 --- a/src/USER-CUDA/pair_tersoff_cuda.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(tersoff/cuda,PairTersoffCuda) - -#else - -#ifndef PAIR_TERSOFF_CUDA_H -#define PAIR_TERSOFF_CUDA_H - -#include "pair_tersoff_cuda_cu.h" -#include "pair_tersoff.h" -#include "cuda_data.h" - -namespace LAMMPS_NS { - -class PairTersoffCuda : public PairTersoff -{ - public: - PairTersoffCuda(class LAMMPS *); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_list(int, class NeighList *); - void init_style(); - void ev_setup(int eflag, int vflag); - protected: - - class Cuda *cuda; - void allocate(); - bool allocated2; - class CudaNeighList* cuda_neigh_list; - Param_Float* params_f; - Param_Float* cu_params_f; - cCudaData<int, int, xyz >* cu_elem2param; - cCudaData<int, int, x >* cu_map; - bool init; - bool iszbl; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pair_tersoff_zbl_cuda.cpp b/src/USER-CUDA/pair_tersoff_zbl_cuda.cpp deleted file mode 100644 index 91dcf6189e..0000000000 --- a/src/USER-CUDA/pair_tersoff_zbl_cuda.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Aidan Thompson (SNL) - original Tersoff implementation - David Farrell (NWU) - ZBL addition -------------------------------------------------------------------------- */ - -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "pair_tersoff_zbl_cuda.h" -#include "atom.h" -#include "update.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "force.h" -#include "comm.h" -#include "memory.h" -#include "error.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXLINE 1024 -#define DELTA 4 - -/* ---------------------------------------------------------------------- */ - -PairTersoffZBLCuda::PairTersoffZBLCuda(LAMMPS *lmp) : PairTersoffCuda(lmp) -{ - // hard-wired constants in metal or real units - // a0 = Bohr radius - // epsilon0 = permittivity of vacuum = q / energy-distance units - // e = unit charge - // 1 Kcal/mole = 0.043365121 eV - - if (strcmp(update->unit_style,"metal") == 0) { - global_a_0 = 0.529; - global_epsilon_0 = 0.00552635; - global_e = 1.0; - } else if (strcmp(update->unit_style,"real") == 0) { - global_a_0 = 0.529; - global_epsilon_0 = 0.00552635 * 0.043365121; - global_e = 1.0; - } else error->all(FLERR,"Pair tersoff/zbl requires metal or real units"); - iszbl = true; -} - -/* ---------------------------------------------------------------------- */ - -void PairTersoffZBLCuda::read_file(char *file) -{ - int params_per_line = 21; - char **words = new char*[params_per_line+1]; - - delete [] params; - params = NULL; - nparams = 0; - - // open file on proc 0 - - FILE *fp; - if (comm->me == 0) { - fp = fopen(file,"r"); - if (fp == NULL) { - char str[128]; - sprintf(str,"Cannot open Tersoff potential file %s",file); - error->one(FLERR,str); - } - } - - // read each line out of file, skipping blank lines or leading '#' - // store line of params if all 3 element tags are in element list - - int n,nwords,ielement,jelement,kelement; - char line[MAXLINE],*ptr; - int eof = 0; - - while (1) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fp); - if (ptr == NULL) { - eof = 1; - fclose(fp); - } else n = strlen(line) + 1; - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - // strip comment, skip line if blank - - if ((ptr = strchr(line,'#'))) *ptr = '\0'; - nwords = atom->count_words(line); - if (nwords == 0) continue; - - // concatenate additional lines until have params_per_line words - - while (nwords < params_per_line) { - n = strlen(line); - if (comm->me == 0) { - ptr = fgets(&line[n],MAXLINE-n,fp); - if (ptr == NULL) { - eof = 1; - fclose(fp); - } else n = strlen(line) + 1; - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - if ((ptr = strchr(line,'#'))) *ptr = '\0'; - nwords = atom->count_words(line); - } - - if (nwords != params_per_line) - error->all(FLERR,"Incorrect format in Tersoff potential file"); - - // words = ptrs to all words in line - - nwords = 0; - words[nwords++] = strtok(line," \t\n\r\f"); - while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; - - // ielement,jelement,kelement = 1st args - // if all 3 args are in element list, then parse this line - // else skip to next line - - for (ielement = 0; ielement < nelements; ielement++) - if (strcmp(words[0],elements[ielement]) == 0) break; - if (ielement == nelements) continue; - for (jelement = 0; jelement < nelements; jelement++) - if (strcmp(words[1],elements[jelement]) == 0) break; - if (jelement == nelements) continue; - for (kelement = 0; kelement < nelements; kelement++) - if (strcmp(words[2],elements[kelement]) == 0) break; - if (kelement == nelements) continue; - - // load up parameter settings and error check their values - - if (nparams == maxparam) { - maxparam += DELTA; - params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), - "pair:params"); - } - - params[nparams].ielement = ielement; - params[nparams].jelement = jelement; - params[nparams].kelement = kelement; - params[nparams].powerm = atof(words[3]); - params[nparams].gamma = atof(words[4]); - params[nparams].lam3 = atof(words[5]); - params[nparams].c = atof(words[6]); - params[nparams].d = atof(words[7]); - params[nparams].h = atof(words[8]); - params[nparams].powern = atof(words[9]); - params[nparams].beta = atof(words[10]); - params[nparams].lam2 = atof(words[11]); - params[nparams].bigb = atof(words[12]); - params[nparams].bigr = atof(words[13]); - params[nparams].bigd = atof(words[14]); - params[nparams].lam1 = atof(words[15]); - params[nparams].biga = atof(words[16]); - params[nparams].Z_i = atof(words[17]); - params[nparams].Z_j = atof(words[18]); - params[nparams].ZBLcut = atof(words[19]); - params[nparams].ZBLexpscale = atof(words[20]); - - // currently only allow m exponent of 1 or 3 - - params[nparams].powermint = int(params[nparams].powerm); - - if ( - params[nparams].lam3 < 0.0 || params[nparams].c < 0.0 || - params[nparams].d < 0.0 || params[nparams].powern < 0.0 || - params[nparams].beta < 0.0 || params[nparams].lam2 < 0.0 || - params[nparams].bigb < 0.0 || params[nparams].bigr < 0.0 || - params[nparams].bigd < 0.0 || - params[nparams].bigd > params[nparams].bigr || - params[nparams].lam3 < 0.0 || params[nparams].biga < 0.0 || - params[nparams].powerm - params[nparams].powermint != 0.0 || - (params[nparams].powermint != 3 && params[nparams].powermint != 1) || - params[nparams].gamma < 0.0 || - params[nparams].Z_i < 1.0 || params[nparams].Z_j < 1.0 || - params[nparams].ZBLcut < 0.0 || params[nparams].ZBLexpscale < 0.0) - error->all(FLERR,"Illegal Tersoff parameter"); - - nparams++; - } - - delete [] words; -} - -void PairTersoffZBLCuda::coeff(int narg, char **arg) -{ - PairTersoffCuda::coeff(narg, arg); - for(int i=0;i<maxparam;i++) - { - params_f[i].a_ij = (0.8854*global_a_0) / - (pow(params[i].Z_i,0.23) + pow(params[i].Z_j,0.23)); - params_f[i].premult = (params[i].Z_i * params[i].Z_j * pow(global_e,2.0))/(4.0*MY_PI*global_epsilon_0); - params_f[i].ZBLcut = params[i].ZBLcut; - params_f[i].ZBLexpscale = params[i].ZBLexpscale; - } -} diff --git a/src/USER-CUDA/pair_tersoff_zbl_cuda.h b/src/USER-CUDA/pair_tersoff_zbl_cuda.h deleted file mode 100644 index 4b935b86b2..0000000000 --- a/src/USER-CUDA/pair_tersoff_zbl_cuda.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(tersoff/zbl/cuda,PairTersoffZBLCuda) - -#else - -#ifndef PAIR_TERSOFF_ZBL_CUDA_H -#define PAIR_TERSOFF_ZBL_CUDA_H - -#include "pair_tersoff_cuda.h" - -namespace LAMMPS_NS { - -class PairTersoffZBLCuda : public PairTersoffCuda -{ - public: - PairTersoffZBLCuda(class LAMMPS *); - private: - double global_a_0; // Bohr radius for Coulomb repulsion - double global_epsilon_0; // permittivity of vacuum for Coulomb repulsion - double global_e; // proton charge (negative of electron charge) - - void read_file(char *); - void coeff(int narg, char **arg); -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp deleted file mode 100644 index b322c9dd17..0000000000 --- a/src/USER-CUDA/pppm_cuda.cpp +++ /dev/null @@ -1,1420 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <cstring> -#include <cstdio> -#include <cstdlib> -#include <cmath> -#include "pppm_cuda.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "fft3d_wrap_cuda.h" // has to come before pair.h to avoid clash with kokkos -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" -#include "update.h" -#include <ctime> //crmadd -#include "cuda_wrapper_cu.h" -#include "pppm_cuda_cu.h" -#include "user_cuda.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 4096 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -void printArray(double* data,int nx, int ny, int nz) -{ - for(int i=0;i<nz;i++) - for(int j=0;j<ny;j++) - { - printf("%i %i\n",i,j); - for(int k=0;k<nx;k++) - printf("%e ",data[2*(i*ny*nx+j*nx+k)]); - printf("\n\n"); - } -} -void printArray(double*** data,int nx, int ny, int nz) -{ - for(int i=0;i<nx;i++) - for(int j=0;j<ny;j++) - { - printf("%i %i\n",i,j); - for(int k=0;k<nz;k++) - printf("%e ",data[i][j][k]); - printf("\n\n"); - } -} -/* ---------------------------------------------------------------------- */ - -PPPMCuda::PPPMCuda(LAMMPS *lmp, int narg, char **arg) : - PPPMOld(lmp, (narg==2?1:narg), arg) -{ - cuda = lmp->cuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command"); - #ifndef FFT_CUFFT - error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting."); - #endif - - triclinic_support = 0; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL; - density_fft = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1c = fft2c = NULL; - remap = NULL; - - density_brick_int=NULL; - density_intScale=1000000; - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_greensfn = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_virial = NULL; - - cu_gf_b = NULL; - - cu_slabbuf = NULL; - slabbuf = NULL; - - nmax = 0; - part2grid = NULL; - cu_part2grid = NULL; - adev_data_array=NULL; - poissontime=0; - old_nmax=0; - cu_pppm_grid_n=NULL; - cu_pppm_grid_ids=NULL; - - pppm_grid_nmax=0; - pppm2partgrid=new int[3]; - pppm_grid=new int[3]; - firstpass=true; - scale = 1.0; -} - - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMCuda::~PPPMCuda() -{ - delete [] slabbuf; - delete cu_slabbuf; - - delete [] factors; - factors=NULL; - deallocate(); - delete cu_part2grid; - cu_part2grid=NULL; - memory->destroy(part2grid); - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMCuda::init() -{ - cuda->shared_data.pppm.cudable_force=1; - - //if(cuda->finished_run) {PPPM::init(); return;} - - if (me == 0) { - if (screen) fprintf(screen,"PPPMCuda initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n"); - } - - // error check - - if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); - if (comm->style != 0) - error->universe_all(FLERR,"PPPMCuda can only currently be used with " - "comm_style brick"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMCuda"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER); - error->all(FLERR,str); - } - // free all arrays previously allocated - - deallocate(); - - // extract short-range Coulombic cutoff from pair style - - triclinic_check(); - - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - - qdist = 0.0; - - if (strcmp(force->kspace_style,"pppm/tip4p") == 0) { - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (2.0 * cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - scale = 1.0; - qqrd2e = force->qqrd2e; - qsum_qsq(); - natoms_original = atom->natoms; - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMCuda grid is too large"); - - // global indices of PPPMCuda grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that I own without ghost cells - // for slab PPPMCuda, assign z grid as if it were not extended - - nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0]; - nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1; - nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1]; - nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1; - nzlo_in = comm->myloc[2] * - (static_cast<int> (nz_pppm/slab_volfactor)) / comm->procgrid[2]; - nzhi_in = (comm->myloc[2]+1) * - (static_cast<int> (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1; - - // nlower,nupper = stencil size for mapping particles to PPPMCuda grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPMCuda, assign z grid as if it were not extended - - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPMCuda, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPMCuda, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,MPI_STATUS_IGNORE); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,MPI_STATUS_IGNORE); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,MPI_STATUS_IGNORE); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,MPI_STATUS_IGNORE); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,MPI_STATUS_IGNORE); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,MPI_STATUS_IGNORE); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0"); - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPMCuda grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - cuda_shared_pppm* ap=&(cuda->shared_data.pppm); - - ap->density_intScale=density_intScale; - ap->nxlo_in=nxlo_in; - ap->nxhi_in=nxhi_in; - ap->nxlo_out=nxlo_out; - ap->nxhi_out=nxhi_out; - ap->nylo_in=nylo_in; - ap->nyhi_in=nyhi_in; - ap->nylo_out=nylo_out; - ap->nyhi_out=nyhi_out; - ap->nzlo_in=nzlo_in; - ap->nzhi_in=nzhi_in; - ap->nzlo_out=nzlo_out; - ap->nzhi_out=nzhi_out; - ap->nxlo_in=nxlo_fft; - ap->nxhi_in=nxhi_fft; - ap->nylo_in=nylo_fft; - ap->nyhi_in=nyhi_fft; - ap->nzlo_in=nzlo_fft; - ap->nzhi_in=nzhi_fft; - ap->nx_pppm=nx_pppm; - ap->ny_pppm=ny_pppm; - ap->nz_pppm=nz_pppm; - ap->qqrd2e=qqrd2e; - ap->order=order; - ap->nmax=nmax; - ap->nlocal=atom->nlocal; - ap->delxinv=delxinv; - ap->delyinv=delyinv; - ap->delzinv=delzinv; - ap->nlower=nlower; - ap->nupper=nupper; - ap->shiftone=shiftone; - - // allocate K-space dependent memory - - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPMCuda coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMCuda::setup() -{ - double *prd; - cu_gf_b->upload(); - // volume-dependent factors - // adjust z dimension for 2d slab PPPMCuda - // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald); - - - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald, -nbx,nby,nbz,xprd,yprd,zprd_slab); - - -#ifdef FFT_CUFFT - cu_vdx_brick->upload(); - cu_vdy_brick->upload(); - cu_vdz_brick->upload(); -#endif - cu_rho_coeff->upload(); - cu_density_brick->memset_device(0); - pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper); -} - -/* ---------------------------------------------------------------------- - compute the PPPMCuda long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMCuda::compute(int eflag, int vflag) -{ - cuda_shared_atom* cu_atom = & cuda->shared_data.atom; - - int i; - my_times starttime; - my_times endtime; - my_times starttotal; - my_times endtotal; - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of PPPM per-atom arrays if necessary - // force update of device data, if arrays resized - - - if (cu_atom->update_nmax || old_nmax == 0) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - delete cu_part2grid; - delete [] adev_data_array; - adev_data_array=new dev_array[1]; - cu_part2grid = new cCudaData<int , int , yx > - ((int*)part2grid,adev_data_array, nmax,3); - - pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(), - atom->nlocal,atom->nmax); - old_nmax=nmax; - } - if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);} - - energy = 0.0; - if (vflag) - { - for (i = 0; i < 6; i++) virial[i] = 0.0; - cu_virial->memset_device(0); - } - if(eflag) cu_energy->memset_device(0); - my_gettime(CLOCK_REALTIME,&starttotal); - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - my_gettime(CLOCK_REALTIME,&starttime); - - particle_map(); - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - //cu_part2grid->download(); - my_gettime(CLOCK_REALTIME,&starttime); - make_rho(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - int nprocs=comm->nprocs; - - my_gettime(CLOCK_REALTIME,&starttime); - - if(nprocs>1) - { - cu_density_brick->download(); - brick2fft(); - } - else - { - #ifdef FFT_CUFFT - pppm_initfftdata(&cuda->shared_data,(PPPM_CFLOAT*)cu_density_brick->dev_data(),(FFT_CFLOAT*)cu_work2->dev_data()); - #endif - } - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - - my_gettime(CLOCK_REALTIME,&starttime); - poisson(eflag,vflag); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate E-field values to fill ghost cells - // surrounding their 3d bricks - - // not necessary since all the calculations are done on one proc - - // calculate the force on my particles - - my_gettime(CLOCK_REALTIME,&starttime); - fieldforce(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // sum energy across procs and add in volume-dependent term - // reset qsum and qsqsum if atom count has changed - - my_gettime(CLOCK_REALTIME,&endtotal); - cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000); - - if (eflag) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - if (atom->natoms != natoms_original) { - qsum_qsq(); - natoms_original = atom->natoms; - } - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/1.772453851 + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qqrd2e; - } - - // sum virial across procs - - if (vflag) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i]; - } - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if(firstpass) firstpass=false; -} - - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - - -void PPPMCuda::allocate() -{ - - struct dev_array* dev_tmp=new struct dev_array[20]; - int n_cudata=0; - - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick_int"); - - - cu_density_brick = new cCudaData<double, PPPM_CFLOAT, x> ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - cu_density_brick_int = new cCudaData<int, int, x> ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick_tmp"); - - cu_vdx_brick = new cCudaData<double, PPPM_CFLOAT, x> ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - cu_vdy_brick = new cCudaData<double, PPPM_CFLOAT, x> ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - cu_vdz_brick = new cCudaData<double, PPPM_CFLOAT, x> ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - - cu_density_fft = new cCudaData<double, PPPM_CFLOAT, x> (density_fft, & (dev_tmp[n_cudata++]),nfft_both); - - cu_energy = new cCudaData<double, ENERGY_CFLOAT, x> (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm); - cu_virial = new cCudaData<double, ENERGY_CFLOAT, x> (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6); - - memory->create(greensfn,nfft_both,"pppm:greensfn"); - cu_greensfn = new cCudaData<double, PPPM_CFLOAT, x> (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm); - - memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1"); - memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2"); - memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3"); - - cu_work1 = new cCudaData<double, FFT_CFLOAT, x> (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work2 = new cCudaData<double, FFT_CFLOAT, x> (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work3 = new cCudaData<double, FFT_CFLOAT, x> (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - - - memory->create(fkx,nx_pppm,"pppmcuda:fkx"); - cu_fkx = new cCudaData<double, PPPM_CFLOAT, x> (fkx, & (dev_tmp[n_cudata++]) , nx_pppm); - memory->create(fky,ny_pppm,"pppmcuda:fky"); - cu_fky = new cCudaData<double, PPPM_CFLOAT, x> (fky, & (dev_tmp[n_cudata++]) , ny_pppm); - memory->create(fkz,nz_pppm,"pppmcuda:fkz"); - cu_fkz = new cCudaData<double, PPPM_CFLOAT, x> (fkz, & (dev_tmp[n_cudata++]) , nz_pppm); - - memory->create(vg,nfft_both,6,"pppm:vg"); - - cu_vg = new cCudaData<double, PPPM_CFLOAT, xy> ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - - // summation coeffs - - - gf_b = new double[order]; - cu_gf_b = new cCudaData<double,PPPM_CFLOAT,x> (gf_b, &(dev_tmp[n_cudata++]) , order); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - cu_rho_coeff = new cCudaData<double, PPPM_CFLOAT, x> ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1)); - - debugdata=new PPPM_CFLOAT[100]; - cu_debugdata = new cCudaData<PPPM_CFLOAT, PPPM_CFLOAT, x> (debugdata,& (dev_tmp[n_cudata++]),100); - cu_flag = new cCudaData<int, int, x> (&global_flag,& (dev_tmp[n_cudata++]),3); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - - - - fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,true); - - fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,false); - - -#ifdef FFT_CUFFT - fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data()); - fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data()); -#endif - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,2,0); - - -pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data() - , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data() - ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm - ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data() - ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag - ); -} - - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - ---------------------------------------------------------------------- */ - -void PPPMCuda::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - density_fft = NULL; - greensfn = NULL; - work1 = NULL; - work2 = NULL; - vg = NULL; - - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - - fkx = NULL; - fky = NULL; - fkz = NULL; - - delete cu_density_brick; - delete cu_density_brick_int; - delete cu_vdx_brick; - delete cu_vdy_brick; - delete cu_vdz_brick; - delete cu_density_fft; - delete cu_energy; - delete cu_virial; -#ifdef FFT_CUFFT - delete cu_greensfn; - delete cu_gf_b; - delete cu_vg; - delete cu_work1; - delete cu_work2; - delete cu_work3; - delete cu_fkx; - delete cu_fky; - delete cu_fkz; -#endif - - delete cu_flag; - delete cu_debugdata; - delete cu_rho_coeff; - - - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_virial=NULL; -#ifdef FFT_CUFFT - cu_greensfn = NULL; - cu_gf_b = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; -#endif - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_part2grid = NULL; - - memory->destroy(buf1); - memory->destroy(buf2); - - delete [] gf_b; - gf_b = NULL; - memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL; - memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL; - - delete fft1c; - fft1c = NULL; - - delete fft2c; - fft2c = NULL; - delete remap; - remap = NULL; - buf1 = NULL; - buf2 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald --------------------------------------------------------------------------*/ - -void PPPMCuda::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - bigint natoms = atom->natoms; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPMCuda - // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired error and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - - if (!gewaldflag) - g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / - (2.0*q2))) / cutoff; - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until precision target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1/g_ewald; - - nx_pppm = static_cast<int> (xprd/h_x + 1); - ny_pppm = static_cast<int> (yprd/h_y + 1); - nz_pppm = static_cast<int> (zprd_slab/h_z + 1); - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - - // adjust g_ewald for new grid size - - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G"); - } - } - - // final RMS precision - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { - if (screen) { - fprintf(screen," G vector = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(screen," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - if (logfile) { - fprintf(logfile," G vector = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(logfile," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - } -} - - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - - -void PPPMCuda::particle_map() -{ - MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");) - int flag = 0; - - cu_flag->memset_device(0); - flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data()); - if(flag) - { - cu_debugdata->download(); - printf("Out of range atom: "); - printf("ID: %i ",atom->tag[int(debugdata[0])]); - printf("x: %e ",debugdata[7]); - printf("y: %e ",debugdata[8]); - printf("z: %e ",debugdata[9]); - printf("nx: %e ",debugdata[4]); - printf("ny: %e ",debugdata[5]); - - printf("\n"); - //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal); - cuda->cu_x->download(); - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); } - } - - } - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - - -void PPPMCuda::make_rho() -{ - cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data()); -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ -void PPPMCuda::poisson(int eflag, int vflag) -{ - -#ifndef FFT_CUFFT - PPPMOld::poisson(eflag,vflag); - return; -#endif -#ifdef FFT_CUFFT - my_times starttime; - my_times endtime; - - - my_gettime(CLOCK_REALTIME,&starttime); - fft1c->compute(density_fft,work1,1); - - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - - - if (eflag || vflag) { - poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag); - ENERGY_CFLOAT gpuvirial[6]; - energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial); - if(vflag) - { - for(int j=0;j<6;j++) virial[j]+=gpuvirial[j]; - } - } - - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - poisson_scale(nx_pppm,ny_pppm,nz_pppm); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - - poisson_xgrad(nx_pppm,ny_pppm,nz_pppm); - - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - - // y direction gradient - - poisson_ygrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - // z direction gradient - - poisson_zgrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - #endif -} - -/*---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles --------------------------------------------------------------------------*/ - -void PPPMCuda::fieldforce() -{ - cuda_fieldforce(& cuda->shared_data,cu_flag); - return; -} - -/* ---------------------------------------------------------------------- - perform and time the 4 FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMCuda::timing_1d(int n, double &time1d) -{ - time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n; - return 4; -} - -int PPPMCuda::timing_3d(int n, double &time3d) -{ - time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n; - return 4; -} - -void PPPMCuda::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - if(slabbuf==NULL) - { - slabbuf=new ENERGY_CFLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData<ENERGY_CFLOAT,ENERGY_CFLOAT, x> (slabbuf, (atom->nmax+31)/32); - } - if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_CFLOAT)>=unsigned(cu_slabbuf->dev_size())) - { - delete [] slabbuf; - delete cu_slabbuf; - slabbuf=new ENERGY_CFLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData<ENERGY_CFLOAT,ENERGY_CFLOAT, x> (slabbuf, (atom->nmax+31)/32); - } - - - double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_CFLOAT*) cu_slabbuf->dev_data()); - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - //if (eflag) energy += qqrd2e*scale * e_slabcorr; - // need to add a correction to make non-neutral systems and per-atom energy translationally invariant - if (eflag || fabs(qsum) > SMALL) - error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting."); - - double ffact = -4.0*MY_PI*dipole_all/volume; - - cuda_slabcorr_force(&cuda->shared_data,ffact); -} diff --git a/src/USER-CUDA/pppm_cuda.h b/src/USER-CUDA/pppm_cuda.h deleted file mode 100644 index cd22aa1d5d..0000000000 --- a/src/USER-CUDA/pppm_cuda.h +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef KSPACE_CLASS - -KSpaceStyle(pppm/cuda,PPPMCuda) - -#else - -#ifndef LMP_PPPM_CUDA_H -#define LMP_PPPM_CUDA_H - -#include "pppm_old.h" -#include "cuda_data.h" -#include "cuda_precision.h" - -namespace LAMMPS_NS { - -class PPPMCuda : public PPPMOld { - public: - PPPMCuda(class LAMMPS *, int, char **); - ~PPPMCuda(); - void init(); - void setup(); - void compute(int, int); - int timing_1d(int, double &); - int timing_3d(int, double &); - - double poissontime; - - protected: - class Cuda *cuda; - class FFT3dCuda *fft1c,*fft2c; - double* work3; - - cCudaData<double , FFT_CFLOAT , x >* cu_work1; - cCudaData<double , FFT_CFLOAT , x >* cu_work2; - cCudaData<double , FFT_CFLOAT , x >* cu_work3; - cCudaData<double , PPPM_CFLOAT , x >* cu_greensfn; - cCudaData<double , PPPM_CFLOAT , x >* cu_gf_b; - cCudaData<double , PPPM_CFLOAT , x >* cu_fkx; - cCudaData<double , PPPM_CFLOAT , x >* cu_fky; - cCudaData<double , PPPM_CFLOAT , x >* cu_fkz; - cCudaData<double , PPPM_CFLOAT , xy>* cu_vg; - cCudaData<double , PPPM_CFLOAT , x >* cu_density_brick; - cCudaData<int , int , x >* cu_density_brick_int; - cCudaData<double , PPPM_CFLOAT , x >* cu_vdx_brick; - cCudaData<double , PPPM_CFLOAT , x >* cu_vdy_brick; - cCudaData<double , PPPM_CFLOAT , x >* cu_vdz_brick; - cCudaData<double , PPPM_CFLOAT , x >* cu_density_fft; - cCudaData<double , ENERGY_CFLOAT , x >* cu_energy; - cCudaData<double , ENERGY_CFLOAT , x >* cu_virial; - cCudaData<double , X_CFLOAT , yx>* cu_x; - cCudaData<double , V_CFLOAT , yx>* cu_v; - cCudaData<double , F_CFLOAT , yx>* cu_f; - cCudaData<double , F_CFLOAT , yx>* cu_q; - cCudaData<int , int , yx>* cu_part2grid; - cCudaData<double , PPPM_CFLOAT , x >* cu_rho_coeff; - cCudaData<PPPM_CFLOAT , PPPM_CFLOAT , x >* cu_debugdata; - cCudaData<int , int , x >* cu_flag; - cCudaData<int , int , x >* cu_pppm_grid_n; - cCudaData<int , int , x >* cu_pppm_grid_ids; - - ENERGY_CFLOAT* slabbuf; - cCudaData<ENERGY_CFLOAT, ENERGY_CFLOAT, x >* cu_slabbuf; - - int*** density_brick_int; - PPPM_CFLOAT density_intScale; - int pppm_grid_nmax; - int* pppm2partgrid; - int* pppm_grid; - PPPM_CFLOAT* debugdata; - bool firstpass; - - void set_grid(); - void allocate(); - void deallocate(); - - virtual void particle_map(); - virtual void make_rho(); - virtual void poisson(int, int); - virtual void fieldforce(); - virtual void slabcorr(int); - double*** vdx_brick_tmp; - int old_nmax; - int global_flag; - dev_array* adev_data_array; -}; - -} - -#endif -#endif diff --git a/src/USER-CUDA/pppm_old.cpp b/src/USER-CUDA/pppm_old.cpp deleted file mode 100755 index 2cc4c18626..0000000000 --- a/src/USER-CUDA/pppm_old.cpp +++ /dev/null @@ -1,2839 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) -------------------------------------------------------------------------- */ - -#include <mpi.h> -#include <string.h> -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include "pppm_old.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - triclinic_support = 0; - pppmflag = 1; - group_group_enable = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = buf3 = buf4 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - - nmax = 0; - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMOld::~PPPMOld() -{ - delete [] factors; - deallocate(); - deallocate_peratom(); - deallocate_groups(); - memory->destroy(part2grid); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMOld::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - peratom_allocate_flag = 0; - deallocate_groups(); - group_allocate_flag = 0; - - // extract short-range Coulombic cutoff from pair style - - pair_check(); - - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - scale = 1.0; - qqrd2e = force->qqrd2e; - qsum_qsq(); - natoms_original = atom->natoms; - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); - - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast<int> - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast<int> - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,MPI_STATUS_IGNORE); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,MPI_STATUS_IGNORE); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,MPI_STATUS_IGNORE); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,MPI_STATUS_IGNORE); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,MPI_STATUS_IGNORE); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,MPI_STATUS_IGNORE); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - - nbuf_peratom = 7*nbuf; - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - - // allocate K-space dependent memory - // don't invoke allocate_peratom() here, wait to see if needed - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMOld::setup() -{ - int i,j,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nx,ny,nz,kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - - int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - - double form = 1.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); - snz2 = snz*snz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = sin(0.5*unitky*lper*yprd/ny_pppm); - sny2 = sny*sny; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = sin(0.5*unitkx*kper*xprd/nx_pppm); - snx2 = snx*snx; - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706/sqk; - denominator = gf_denom(snx2,sny2,snz2); - sum1 = 0.0; - const double dorder = static_cast<double>(order); - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMOld::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - peratom_allocate_flag = 1; - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(eflag,vflag); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - fillbrick(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fillbrick_peratom(); - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // update qsum and qsqsum, if atom count has changed and energy needed - - if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) { - qsum_qsq(); - natoms_original = atom->natoms; - } - - // sum global energy across procs and add in volume-dependent term - - const double qscale = qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) { - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - // summation coeffs - - memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,collective_flag); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,collective_flag); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag); -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate_peratom() -{ - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - memory->create(buf3,nbuf_peratom,"pppm:buf3"); - memory->create(buf4,nbuf_peratom,"pppm:buf4"); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - - memory->destroy(buf1); - memory->destroy(buf2); - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - - delete fft1; - delete fft2; - delete remap; -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_peratom() -{ - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(buf3); - memory->destroy(buf4); -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald -------------------------------------------------------------------------- */ - -void PPPMOld::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until accuracy target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast<int> (xprd/h_x) + 1; - ny_pppm = static_cast<int> (yprd/h_y) + 1; - nz_pppm = static_cast<int> (zprd_slab/h_z) + 1; - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - // adjust g_ewald for new grid size - - h_x = xprd/static_cast<double>(nx_pppm); - h_y = yprd/static_cast<double>(ny_pppm); - h_z = zprd_slab/static_cast<double>(nz_pppm); - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10.0/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); - } - } - - // final RMS accuracy - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - if (screen) { - fprintf(screen," G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - } - } -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMOld::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double PPPMOld::rms(double h, double prd, bigint natoms, - double q2, double **acons) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); - return value; -} - -/* ---------------------------------------------------------------------- - compute difference in real-space and KSpace RMS accuracy -------------------------------------------------------------------------- */ - -double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, - double **acons) -{ - double lprx,lpry,lprz,kspace_prec,real_prec; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - lprx = rms(h_x,xprd,natoms,q2,acons); - lpry = rms(h_y,yprd,natoms,q2,acons); - lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); - kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd); - double value = kspace_prec - real_prec; - return value; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPMOld::brick2fft() -{ - int i,n,ix,iy,iz; - MPI_Request request; - - // pack my ghosts for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // remap from 3d brick decomposition to FFT decomposition - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick() -{ - int i,n,ix,iy,iz; - MPI_Request request; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with per-atom field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick_peratom() -{ - int i,n,ix,iy,iz; - MPI_Request request; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMOld::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMOld::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPMOld::poisson(int,int) -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += v0; - vatom[i][1] += v1; - vatom[i][2] += v2; - vatom[i][3] += v3; - vatom[i][4] += v4; - vatom[i][5] += v5; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMOld::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMOld::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - bytes += 2 * nbuf * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) { - bytes += 7 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); - } - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) -{ - if (slabflag) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group"); - - int i; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0; //energy - f2group[0] = 0; //force in x-direction - f2group[1] = 0; //force in y-direction - f2group[2] = 0; //force in z-direction - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,BA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(BA_flag); - - const double qscale = qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::allocate_groups() -{ - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_groups() -{ - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) - if (BA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::poisson_groups(int BA_flag) -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (BA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} diff --git a/src/USER-CUDA/pppm_old.h b/src/USER-CUDA/pppm_old.h deleted file mode 100644 index 57a92e1202..0000000000 --- a/src/USER-CUDA/pppm_old.h +++ /dev/null @@ -1,271 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef KSPACE_CLASS - -KSpaceStyle(pppm/old,PPPMOld) - -#else - -#ifndef LMP_PPPM_OLD_H -#define LMP_PPPM_OLD_H - -#include "lmptype.h" -#include <mpi.h> - -#ifdef FFT_SINGLE -typedef float FFT_SCALAR; -#define MPI_FFT_SCALAR MPI_CFLOAT -#else -typedef double FFT_SCALAR; -#define MPI_FFT_SCALAR MPI_DOUBLE -#endif - -#include "kspace.h" - -namespace LAMMPS_NS { - -class PPPMOld : public KSpace { - public: - PPPMOld(class LAMMPS *, int, char **); - virtual ~PPPMOld(); - virtual void init(); - virtual void setup(); - virtual void compute(int, int); - virtual int timing_1d(int, double &); - virtual int timing_3d(int, double &); - virtual double memory_usage(); - - virtual void compute_group_group(int, int, int); - - protected: - int me,nprocs; - int nfactors; - int *factors; - double cutoff; - double volume; - double delxinv,delyinv,delzinv,delvolinv; - double shift,shiftone; - int peratom_allocate_flag; - - int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in; - int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out; - int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost; - int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft; - int nlower,nupper; - int ngrid,nfft,nfft_both; - int nbuf,nbuf_peratom; - - FFT_SCALAR ***density_brick; - FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick; - FFT_SCALAR ***u_brick; - FFT_SCALAR ***v0_brick,***v1_brick,***v2_brick; - FFT_SCALAR ***v3_brick,***v4_brick,***v5_brick; - double *greensfn; - double **vg; - double *fkx,*fky,*fkz; - FFT_SCALAR *density_fft; - FFT_SCALAR *work1,*work2; - FFT_SCALAR *buf1,*buf2,*buf3,*buf4; - - double *gf_b; - FFT_SCALAR **rho1d,**rho_coeff; - - // group-group interactions - - int group_allocate_flag; - FFT_SCALAR ***density_A_brick,***density_B_brick; - FFT_SCALAR *density_A_fft,*density_B_fft; - - - class FFT3d *fft1,*fft2; - class Remap *remap; - - int **part2grid; // storage for particle -> grid mapping - int nmax; - - int triclinic; // domain settings, orthog or triclinic - double *boxlo; - // TIP4P settings - int typeH,typeO; // atom types of TIP4P water H and O atoms - double qdist; // distance from O site to negative charge - double alpha; // geometric factor - - void set_grid(); - virtual void allocate(); - virtual void allocate_peratom(); - virtual void deallocate(); - virtual void deallocate_peratom(); - int factorable(int); - double rms(double, double, bigint, double, double **); - double diffpr(double, double, double, double, double **); - void compute_gf_denom(); - - virtual void particle_map(); - virtual void make_rho(); - virtual void brick2fft(); - virtual void fillbrick(); - virtual void fillbrick_peratom(); - virtual void poisson(int,int); - virtual void poisson_peratom(); - virtual void fieldforce(); - virtual void fieldforce_peratom(); - void procs2grid2d(int,int,int,int *, int*); - void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, - const FFT_SCALAR &); - void compute_rho_coeff(); - void slabcorr(); - - // group-group interactions - - virtual void allocate_groups(); - virtual void deallocate_groups(); - virtual void make_rho_groups(int, int, int); - virtual void poisson_groups(int); - -/* ---------------------------------------------------------------------- - denominator for Hockney-Eastwood Green's function - of x,y,z = sin(kx*deltax/2), etc - - inf n-1 - S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l - j=-inf l=0 - - = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) - gf_b = denominator expansion coeffs -------------------------------------------------------------------------- */ - - inline double gf_denom(const double &x, const double &y, - const double &z) const { - double sx,sy,sz; - sz = sy = sx = 0.0; - for (int l = order-1; l >= 0; l--) { - sx = gf_b[l] + sx*x; - sy = gf_b[l] + sy*y; - sz = gf_b[l] + sz*z; - } - double s = sx*sy*sz; - return s*s; - }; -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Illegal ... command - -Self-explanatory. Check the input script syntax and compare to the -documentation for the command. You can use -echo screen as a -command-line option when running LAMMPS to see the offending line. - -E: Cannot use PPPM with 2d simulation - -The kspace style pppm cannot be used in 2d simulations. You can use -2d PPPM in a 3d simulation; see the kspace_modify command. - -E: Kspace style requires atom attribute q - -The atom style defined does not have these attributes. - -E: Cannot use nonperiodic boundaries with PPPM - -For kspace style pppm, all 3 dimensions must have periodic boundaries -unless you use the kspace_modify command to define a 2d slab with a -non-periodic z dimension. - -E: Incorrect boundaries with slab PPPM - -Must have periodic x,y dimensions and non-periodic z dimension to use -2d slab option with PPPM. - -E: PPPM order cannot be < 2 or > than %d - -This is a limitation of the PPPM implementation in LAMMPS. - -E: KSpace style is incompatible with Pair style - -Setting a kspace style requires that a pair style with a long-range -Coulombic or dispersion component be used. - -E: Bond and angle potentials must be defined for TIP4P - -Cannot use TIP4P pair potential unless bond and angle potentials -are defined. - -E: Bad TIP4P angle type for PPPM/TIP4P - -Specified angle type is not valid. - -E: Bad TIP4P bond type for PPPM/TIP4P - -Specified bond type is not valid. - -E: Cannot use kspace solver on system with no charge - -No atoms in system have a non-zero charge. - -W: System is not charge neutral, net charge = %g - -The total charge on all atoms on the system is not 0.0, which -is not valid for the long-range Coulombic solvers. - -W: Reducing PPPM order b/c stencil extends beyond neighbor processor - -This may lead to a larger grid than desired. See the kspace_modify overlap -command to prevent changing of the PPPM order. - -E: PPPM grid is too large - -The global PPPM grid is larger than OFFSET in one or more dimensions. -OFFSET is currently set to 4096. You likely need to decrease the -requested accuracy. - -E: PPPM order has been reduced to 0 - -The auto-adjust of the order failed. You will need to -set the grid size and order directly via kspace_modify. - -E: KSpace accuracy must be > 0 - -The kspace accuracy designated in the input must be greater than zero. - -E: Cannot compute PPPM G - -The Ewald factor could not be computed for the current choice of -grid size, cutoff, accuracy. - -E: Out of range atoms - cannot compute PPPM - -One or more atoms are attempting to map their charge to a PPPM grid -point that is not owned by a processor. This is likely for one of two -reasons, both of them bad. First, it may mean that an atom near the -boundary of a processor's sub-domain has moved more than 1/2 the -"neighbor skin distance"_neighbor.html without neighbor lists being -rebuilt and atoms being migrated to new processors. This also means -you may be missing pairwise interactions that need to be computed. -The solution is to change the re-neighboring criteria via the -"neigh_modify"_neigh_modify command. The safest settings are "delay 0 -every 1 check yes". Second, it may mean that an atom has moved far -outside a processor's sub-domain or even the entire simulation box. -This indicates bad physics, e.g. due to highly overlapping atoms, too -large a timestep, etc. - -E: Cannot (yet) use K-space slab correction with compute group/group - -This option is not yet supported. - -*/ diff --git a/src/USER-CUDA/user_cuda.h b/src/USER-CUDA/user_cuda.h deleted file mode 100644 index dbcc41ab3b..0000000000 --- a/src/USER-CUDA/user_cuda.h +++ /dev/null @@ -1,159 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifndef CUDA_H -#define CUDA_H - -#include "pointers.h" -#include "cuda_shared.h" -#include "cuda_data.h" -#include "cuda_precision.h" -#include <map> - -#ifdef _DEBUG -#define MYDBG(a) a -#else -#define MYDBG(a) -#endif - -namespace LAMMPS_NS -{ -class Cuda : protected Pointers -{ - public: - Cuda(class LAMMPS*); - ~Cuda(); - //static void setDevice(class LAMMPS*); - void allocate(); - - void accelerator(int, char**); - void activate(); - - void setSharedDataZero(); - void setSystemParams(); - - void setDomainParams(); - - void checkResize(); - void evsetup_eatom_vatom(int eflag_atom, int vflag_atom); - void uploadAll(); - void downloadAll(); - void upload(int datamask); - void download(int datamask); - void downloadX(); - - class CudaNeighList* registerNeighborList(class NeighList* neigh_list); - void uploadAllNeighborLists(); - void downloadAllNeighborLists(); - void set_neighinit(int dist_check, double triggerneighsq) { - shared_data.atom.dist_check = dist_check; - shared_data.atom.triggerneighsq = triggerneighsq; - } - bool decide_by_integrator() { - return neighbor_decide_by_integrator && cu_xhold && finished_setup; - } - void update_xhold(int &maxhold, double* xhold); - - void setTimingsZero(); - void print_timings(); - - void cu_x_download() { - cu_x->download(); - } - bool device_set; - bool dotiming; - bool dotestatom; - int testatom; - - double uploadtime, downloadtime; - bool finished_setup, begin_setup; - bool oncpu; - bool finished_run; - - int self_comm; - - int cuda_exists; - - double extent[6]; - int* debugdata; - // data shared between host code and device code - // (number of atoms, device pointers for up- & download) - cuda_shared_data shared_data; - - cCudaData<double , F_CFLOAT , x >* cu_q; - cCudaData<double , F_CFLOAT , yx>* cu_f; - cCudaData<double , V_CFLOAT , x >* cu_mass; - cCudaData<double , V_CFLOAT , x >* cu_rmass; - cCudaData<double , V_CFLOAT , yx>* cu_v; - cCudaData<double , X_CFLOAT , yx>* cu_x; - cCudaData<double , X_CFLOAT , yx>* cu_xhold; - cCudaData<int , int , x >* cu_mask; - cCudaData<int , int , x >* cu_tag; - cCudaData<int , int , x >* cu_type; - cCudaData<int , int , x >* cu_image; - cCudaData<double , ENERGY_CFLOAT, x >* cu_eatom; - cCudaData<double , ENERGY_CFLOAT, yx>* cu_vatom; - cCudaData<double , ENERGY_CFLOAT, x >* cu_virial; - cCudaData<double , ENERGY_CFLOAT, x >* cu_eng_vdwl; - cCudaData<double , ENERGY_CFLOAT, x >* cu_eng_coul; - cCudaData<double , double , x >* cu_extent; - int* binned_id; - cCudaData<int , int , xx >* cu_binned_id; - int* binned_idnew; - cCudaData<int , int , xx >* cu_binned_idnew; - cCudaData<int , int , x >* cu_debugdata; - cCudaData<double , X_CFLOAT , x>* cu_radius; - cCudaData<double , F_CFLOAT , x>* cu_density; - cCudaData<double , V_CFLOAT , yx>* cu_omega; - cCudaData<double , F_CFLOAT , yx>* cu_torque; - cCudaData<int , int , yx >* cu_special; - cCudaData<int , int , yx >* cu_nspecial; - cCudaData<int , int , x >* cu_molecule; - - - cCudaData<X_CFLOAT , X_CFLOAT , x>* cu_x_type; - X_CFLOAT* x_type; - - cCudaData<V_CFLOAT , V_CFLOAT , x>* cu_v_radius; - V_CFLOAT* v_radius; - - cCudaData<V_CFLOAT , V_CFLOAT , x>* cu_omega_rmass; - V_CFLOAT* omega_rmass; - - cCudaData<int , int , x >* cu_map_array; - int neighbor_decide_by_integrator; - - bool pinned; - - void* copy_buffer; - int copy_buffersize; - - private: - int pppn; // number of GPUs/node - int *devicelist; // IDs of GPUs - - std::map<class NeighList*, class CudaNeighList*> neigh_lists; -}; -} - -#endif // CUDA_H diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp deleted file mode 100644 index 8bb4419cda..0000000000 --- a/src/USER-CUDA/verlet_cuda.cpp +++ /dev/null @@ -1,1230 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - - -#include <cstdlib> -#include <cstdio> -#include <cstring> -#include "verlet_cuda.h" -#include "neighbor.h" -#include "domain.h" -#include "comm.h" -#include "atom.h" -#include "atom_vec.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "dihedral.h" -#include "improper.h" -#include "kspace.h" -#include "output.h" -#include "update.h" -#include "modify_cuda.h" -#include "compute.h" -#include "fix.h" -#include "timer.h" -#include "memory.h" -#include "error.h" -#include "cuda_wrapper_cu.h" -#include "thermo.h" -#include "cuda_pair_cu.h" -#include "user_cuda.h" -#include <ctime> -#include <cmath> -#ifdef _OPENMP -#include <omp.h> -#endif - -using namespace LAMMPS_NS; - -#define MAKETIMEING - - -VerletCuda::VerletCuda(LAMMPS* lmp, int narg, char** arg) : Verlet(lmp, narg, arg) -{ - if (comm->me == 0) - error->warning(FLERR,"The USER-CUDA pacakge will be deprecated " - "soon - users should switch to the GPU or KOKKOS packages"); - - cuda = lmp->cuda; - - if(cuda == NULL) - error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - modify_cuda = (ModifyCuda*) modify; - int ifix = modify->find_fix("package_omp"); - - if(ifix >= 0) external_force_clear = 1; -} - -/* ---------------------------------------------------------------------- - setup before run -------------------------------------------------------------------------- */ - -void VerletCuda::setup() -{ - //debug related variables - cuda->debugdata[0] = 0; - cuda->cu_debugdata->upload(); - dotestatom = cuda->dotestatom; - int testatom = cuda->testatom; //48267; - - if(atom->nlocal == 0) - error->warning(FLERR, "# CUDA: There are currently no atoms on one of the MPI processes. This is known to cause errors with the USER-CUDA package. Please use the 'processors' keyword to enforce more balanced processor layout."); - - MYDBG(printf("# CUDA VerletCuda::setup start\n");) - - cuda->oncpu = true; - cuda->begin_setup = true; - cuda->finished_setup = false; - cuda->finished_run = false; - - time_pair = 0; - time_kspace = 0; - time_comm = 0; - time_modify = 0; - time_fulliterate = 0; - - atom->setup(); - - cuda_shared_atom* cu_atom = & cuda->shared_data.atom; - cu_atom->update_nlocal = 1; - cu_atom->update_nmax = 1; - - if(atom->molecular || (force->kspace && (not cuda->shared_data.pppm.cudable_force))) cuda->shared_data.pair.collect_forces_later = true; - - cuda->setDomainParams(); - - - if(cuda->shared_data.me == 0) - printf("# CUDA: Using precision: Global: %u X: %u V: %u F: %u PPPM: %u \n", CUDA_PRECISION == 1 ? 4 : 8, (int) sizeof(X_CFLOAT), (int) sizeof(V_CFLOAT), (int) sizeof(F_CFLOAT), (int) sizeof(PPPM_CFLOAT)); - - cuda->allocate(); - - if (comm->me == 0 && screen) { - fprintf(screen,"Setting up Verlet run ...\n"); - fprintf(screen," Unit style : %s\n", update->unit_style); - fprintf(screen," Current step: " BIGINT_FORMAT "\n", update->ntimestep); - fprintf(screen," Time step : %g\n", update->dt); - } - - // setup domain, communication and neighboring - // acquire ghosts - // build neighbor lists - modify->setup_pre_exchange(); - - if(triclinic) domain->x2lamda(atom->nlocal); - - domain->pbc(); - domain->reset_box(); - comm->setup(); - - if(neighbor->style) neighbor->setup_bins(); - - comm->exchange(); - - if(atom->sortfreq > 0) atom->sort(); - - comm->borders(); - - if(triclinic) domain->lamda2x(atom->nlocal + atom->nghost); - - cuda->setSystemParams(); - cuda->checkResize(); - - if(cuda->shared_data.me == 0) - printf("# CUDA: VerletCuda::setup: Upload data...\n"); - - cuda->uploadAll(); - neighbor->build(); - neighbor->ncalls = 0; - - if(atom->mass) - cuda->cu_mass->upload(); - - if(cuda->cu_map_array) - cuda->cu_map_array->upload(); - - // compute all forces - - ev_set(update->ntimestep); - - if(elist_atom) cuda->shared_data.atom.need_eatom = 1; - - if(vlist_atom) cuda->shared_data.atom.need_vatom = 1; - - if(elist_atom || vlist_atom) cuda->checkResize(); - - int test_BpA_vs_TpA = true; - my_times starttime; - my_times endtime; -#ifdef NO_PREC_TIMING - double startsec, endsec; -#endif - - //if(atom->molecular||(force->kspace&&(not cuda->shared_data.pppm.cudable_force))) cuda->shared_data.pair.collect_forces_later = false; - if(test_BpA_vs_TpA && cuda->shared_data.pair.cudable_force && force->pair && (cuda->shared_data.pair.override_block_per_atom < 0)) { - int StyleLoops = 10; - - if(cuda->shared_data.me == 0) - printf("Test TpA\n"); - - cuda->shared_data.pair.use_block_per_atom = 0; - neighbor->build(); - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - force->pair->compute(eflag, vflag); - CudaWrapper_Sync(); -#ifdef NO_PREC_TIMING - startsec = 1.0 * clock() / CLOCKS_PER_SEC; -#endif - my_gettime(CLOCK_REALTIME, &starttime); - - for(int i = 0; i < StyleLoops; i++) { - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - force->pair->compute(eflag, vflag); - CudaWrapper_Sync(); - } - - my_gettime(CLOCK_REALTIME, &endtime); - - double TpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; -#ifdef NO_PREC_TIMING - endsec = 1.0 * clock() / CLOCKS_PER_SEC; - TpAtime = endsec - startsec; -#endif - - if(cuda->shared_data.me == 0) - printf("Test BpA\n"); - - cuda->shared_data.pair.use_block_per_atom = 1; - neighbor->build(); - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - force->pair->compute(eflag, vflag); - CudaWrapper_Sync(); - - my_gettime(CLOCK_REALTIME, &starttime); -#ifdef NO_PREC_TIMING - startsec = 1.0 * clock() / CLOCKS_PER_SEC; -#endif - - for(int i = 0; i < StyleLoops; i++) { - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - force->pair->compute(eflag, vflag); - CudaWrapper_Sync(); - } - - my_gettime(CLOCK_REALTIME, &endtime); - double BpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; -#ifdef NO_PREC_TIMING - endsec = 1.0 * clock() / CLOCKS_PER_SEC; - BpAtime = endsec - startsec; -#endif - - if(cuda->shared_data.me == 0) - printf("\n# CUDA: Timing of parallelisation layout with %i loops:\n", StyleLoops); - - if(cuda->shared_data.me == 0) - printf("# CUDA: BpA TpA\n %lf %lf\n", BpAtime, TpAtime); - - if(BpAtime > TpAtime) cuda->shared_data.pair.use_block_per_atom = 0; - } else - cuda->shared_data.pair.use_block_per_atom = cuda->shared_data.pair.override_block_per_atom; - - //cuda->shared_data.pair.use_block_per_atom = 0; - if(atom->molecular || (force->kspace && (not cuda->shared_data.pppm.cudable_force))) cuda->shared_data.pair.collect_forces_later = true; - - neighbor->build(); - neighbor->ncalls = 0; - - force_clear(); - - modify->setup_pre_force(vflag); - - cuda->cu_f->download(); - - if(cuda->cu_torque) - cuda->cu_torque->download(); - - //printf("# Verlet::setup: g f[0] = (%f, %f, %f)\n", atom->f[0][0], atom->f[0][1], atom->f[0][2]); - - MYDBG(printf("# CUDA: VerletCuda::setup: initial force compute\n");) - - //test_atom(testatom,"pre pair force"); - - if(cuda->shared_data.pair.cudable_force) { - cuda->uploadAll(); - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - } - - if(force->pair) force->pair->compute(eflag, vflag); - - if(cuda->shared_data.pair.cudable_force) { - if(cuda->shared_data.pair.collect_forces_later) { - if(eflag) cuda->cu_eng_vdwl->upload(); - - if(eflag) cuda->cu_eng_coul->upload(); - - if(vflag) cuda->cu_virial->upload(); - - Cuda_Pair_CollectForces(&cuda->shared_data, eflag, vflag); - - if(eflag) cuda->cu_eng_vdwl->download(); - - if(eflag) cuda->cu_eng_coul->download(); - - if(vflag) cuda->cu_virial->download(); - } - - cuda->downloadAll(); - } - - test_atom(testatom, "post pair force"); - - MYDBG(printf("# CUDA: VerletCuda::setup: initial force compute done\n");) - //printf("# Verlet::setup: h f[0] = (%f, %f, %f)\n", atom->f[0][0], atom->f[0][1], atom->f[0][2]); - - if(atom->molecular) { - if(force->bond) force->bond->compute(eflag, vflag); - - if(force->angle) force->angle->compute(eflag, vflag); - - if(force->dihedral) force->dihedral->compute(eflag, vflag); - - if(force->improper) force->improper->compute(eflag, vflag); - } - - - if(cuda->shared_data.pppm.cudable_force) { - cuda->cu_tag ->upload(); - cuda->cu_type->upload(); - cuda->cu_x ->upload(); - cuda->cu_v ->upload(); - cuda->cu_f ->upload(); - - if(cu_atom->q_flag) cuda->cu_q->upload(); - } - - if(force->kspace) { - force->kspace->setup(); - force->kspace->compute(eflag, vflag); - } - - if(cuda->shared_data.pppm.cudable_force) { - cuda->cu_f ->download(); - } - - test_atom(testatom, "post kspace"); - - cuda->uploadAll(); - - if(force->newton) comm->reverse_comm(); - - cuda->downloadAll(); - - test_atom(testatom, "post reverse comm"); - - if(cuda->shared_data.me == 0) - printf("# CUDA: Total Device Memory usage post setup: %lf MB\n", 1.0 * CudaWrapper_CheckMemUsage() / 1024 / 1024); - - MYDBG(printf("# CUDA: VerletCuda::setup: call modify setup\n");) - modify->setup(vflag); - - MYDBG(printf("# CUDA: VerletCuda::setup: call modify setup done\n");) - output->setup(1); - - test_atom(testatom, "post setup"); - - MYDBG(printf("# CUDA: VerletCuda::setup: done\n");) - cuda->finished_setup = true; - cuda->oncpu = false; -} - - -//this routine is in a messy state -void VerletCuda::setup_minimal(int flag) -{ - - printf("SetupMinimal\n"); - dotestatom = 0; - int testatom = 104; - cuda->oncpu = true; - cuda->begin_setup = true; - cuda->finished_run = false; - MYDBG(printf("# CUDA VerletCuda::setup start\n");) - time_pair = 0; - time_kspace = 0; - time_comm = 0; - time_modify = 0; - time_fulliterate = 0; - - //cuda->allocate(); - - cuda_shared_atom* cu_atom = & cuda->shared_data.atom; - cu_atom->update_nlocal = 1; - cu_atom->update_nmax = 1; - - if(atom->molecular) cuda->shared_data.pair.collect_forces_later = true; - - cuda->setDomainParams(); - - - - if(cuda->shared_data.me == 0) - printf("# CUDA: VerletCuda::setup: Allocate memory on device for maximum of %i atoms...\n", atom->nmax); - - cuda->allocate(); - - - - - // setup domain, communication and neighboring - // acquire ghosts - // build neighbor lists - - if(flag) { - if(triclinic) domain->x2lamda(atom->nlocal); - - domain->pbc(); - domain->reset_box(); - comm->setup(); - - if(neighbor->style) neighbor->setup_bins(); - - comm->exchange(); - comm->borders(); - - if(triclinic) domain->lamda2x(atom->nlocal + atom->nghost); - - cuda->setSystemParams(); - cuda->checkResize(); - neighbor->build(); - neighbor->ncalls = 0; - } - - if(cuda->shared_data.me == 0) - printf("# CUDA: VerletCuda::setup: Upload data...\n"); - - cuda->uploadAll(); - cuda->uploadAllNeighborLists(); - - if(atom->mass) - cuda->cu_mass->upload(); - - if(cuda->cu_map_array) - cuda->cu_map_array->upload(); - - // compute all forces - - ev_set(update->ntimestep); - - if(elist_atom) cuda->shared_data.atom.need_eatom = 1; - - if(vlist_atom) cuda->shared_data.atom.need_vatom = 1; - - if(elist_atom || vlist_atom) cuda->checkResize(); - - force_clear(); - cuda->cu_f->download(); - - //printf("# Verlet::setup: g f[0] = (%f, %f, %f)\n", atom->f[0][0], atom->f[0][1], atom->f[0][2]); - - cuda->cu_mass->upload(); - MYDBG(printf("# CUDA: VerletCuda::setup: initial force compute\n");) - - test_atom(testatom, "pre pair force"); - - if(cuda->shared_data.pair.cudable_force) { - cuda->uploadAll(); - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - } - - if(force->pair) force->pair->compute(eflag, vflag); - - if(cuda->shared_data.pair.cudable_force) { - if(cuda->shared_data.pair.collect_forces_later) { - if(eflag) cuda->cu_eng_vdwl->upload(); - - if(eflag) cuda->cu_eng_coul->upload(); - - if(vflag) cuda->cu_virial->upload(); - - Cuda_Pair_CollectForces(&cuda->shared_data, eflag, vflag); - - if(eflag) cuda->cu_eng_vdwl->download(); - - if(eflag) cuda->cu_eng_coul->download(); - - if(vflag) cuda->cu_virial->download(); - } - - cuda->downloadAll(); - } - - test_atom(testatom, "post pair force"); - - MYDBG(printf("# CUDA: VerletCuda::setup: initial force compute done\n");) - //printf("# Verlet::setup: h f[0] = (%f, %f, %f)\n", atom->f[0][0], atom->f[0][1], atom->f[0][2]); - - if(atom->molecular) { - if(force->bond) force->bond->compute(eflag, vflag); - - if(force->angle) force->angle->compute(eflag, vflag); - - if(force->dihedral) force->dihedral->compute(eflag, vflag); - - if(force->improper) force->improper->compute(eflag, vflag); - } - - - if(cuda->shared_data.pppm.cudable_force) { - cuda->cu_tag ->upload(); - cuda->cu_type->upload(); - cuda->cu_x ->upload(); - cuda->cu_v ->upload(); - cuda->cu_f ->upload(); - - if(cu_atom->q_flag) cuda->cu_q->upload(); - } - - if(force->kspace) { - force->kspace->setup(); - force->kspace->compute(eflag, vflag); - } - - if(cuda->shared_data.pppm.cudable_force) { - cuda->cu_f ->download(); - } - - test_atom(testatom, "post kspace"); - - cuda->uploadAll(); - - if(force->newton) comm->reverse_comm(); - - cuda->downloadAll(); - - test_atom(testatom, "post reverse comm"); - - if(cuda->shared_data.me == 0) - printf("# CUDA: Total Device Memory usage post setup: %lf MB\n", 1.0 * CudaWrapper_CheckMemUsage() / 1024 / 1024); - - MYDBG(printf("# CUDA: VerletCuda::setup: call modify setup\n");) - modify->setup(vflag); - - MYDBG(printf("# CUDA: VerletCuda::setup: done\n");) - cuda->finished_setup = true; - cuda->oncpu = false; -} - -//#define TESTATOM -/* ---------------------------------------------------------------------- - iterate for n steps -------------------------------------------------------------------------- */ - -void VerletCuda::run(int n) -{ - dotestatom = cuda->dotestatom; - int testatom = cuda->testatom; //48267; - - - my_times starttime; - my_times endtime; - my_times starttotal; - my_times endtotal; - - cuda->setTimingsZero(); - - int nflag, ntimestep, sortflag; - - int n_post_integrate = modify_cuda->n_post_integrate; - int n_pre_exchange = modify_cuda->n_pre_exchange; - int n_pre_neighbor = modify_cuda->n_pre_neighbor; - int n_pre_force = modify_cuda->n_pre_force; - int n_post_force = modify_cuda->n_post_force; - int n_end_of_step = modify_cuda->n_end_of_step; - MYDBG(printf("# CUDA: Fixes: i_int: %i p_int: %i f_int: %i pr_exc: %i pr_neigh: %i pr_f: %i p_f: %i eos: %i\n", - n_initial_integrate, n_post_integrate, n_final_integrate, n_pre_exchange, n_pre_neighbor, n_pre_force, n_post_force, n_end_of_step);) - - if(atom->sortfreq > 0) sortflag = 1; - else sortflag = 0; - - - if(cuda->shared_data.me == 0) { - if((not cuda->shared_data.pair.cudable_force) && (force->pair)) - error->warning(FLERR, "# CUDA: You asked for a Verlet integration using Cuda, " - "but selected a pair force which has not yet been ported to Cuda"); - - if((not cuda->shared_data.pppm.cudable_force) && (force->kspace)) - error->warning(FLERR, "# CUDA: You asked for a Verlet integration using Cuda, " - "but selected a kspace force which has not yet been ported to Cuda"); - - if(modify_cuda->n_post_integrate_host + modify_cuda->n_pre_exchange_host + modify_cuda->n_pre_neighbor_host + modify_cuda->n_pre_force_host + modify_cuda->n_post_force_host + modify_cuda->n_end_of_step_host + modify_cuda->n_initial_integrate_host + modify_cuda->n_final_integrate_host) - error->warning(FLERR, "# CUDA: You asked for a Verlet integration using Cuda, " - "but several fixes have not yet been ported to Cuda.\n" - "This can cause a severe speed penalty due to frequent data synchronization between host and GPU."); - - if(atom->firstgroupname) - error->warning(FLERR, "Warning: firstgroupname is used, this will cause additional data transfers."); - } - - cuda->uploadAll(); - - if(cuda->neighbor_decide_by_integrator && cuda->cu_xhold) { - const int n = cuda->shared_data.atom.maxhold; - CudaWrapper_CopyData(cuda->cu_xhold->dev_data(), cuda->cu_x->dev_data(), n * sizeof(X_CFLOAT)); - CudaWrapper_CopyData((void*) & ((X_CFLOAT*)cuda->cu_xhold->dev_data())[n], (void*) & ((X_CFLOAT*)cuda->cu_x->dev_data())[atom->nmax], n * sizeof(X_CFLOAT)); - CudaWrapper_CopyData((void*) & ((X_CFLOAT*)cuda->cu_xhold->dev_data())[2 * n], (void*) & ((X_CFLOAT*)cuda->cu_x->dev_data())[2 * atom->nmax], n * sizeof(X_CFLOAT)); - } - - cuda->shared_data.atom.reneigh_flag = 0; - cuda->shared_data.atom.update_nlocal = 1; - cuda->shared_data.atom.update_nmax = 1; - cuda->shared_data.atom.update_neigh = 1; - cuda->shared_data.domain.update = 1; - cuda->shared_data.buffer_new = 1; - cuda->uploadtime = 0; - cuda->downloadtime = 0; - int firstreneigh = 1; - - for(int i = 0; i < n; i++) { - if(atom->nlocal == 0) - error->warning(FLERR, "# CUDA: There are currently no atoms on one of the MPI processes. This is currently prone to encountering errors with USER-CUDA package. Please use the 'processors' keyword to use a more balanced processor layout."); - - ntimestep = ++update->ntimestep; - ev_set(ntimestep); - - // initial time integration - - test_atom(testatom, "Pre initial"); - - MYDBG(printf("# CUDA VerletCuda::iterate: before initial_integrate\n");) - - modify->initial_integrate(vflag); - - MYDBG(printf("# CUDA VerletCuda::iterate: after initial_integrate\n");) - - if(n_post_integrate) modify->post_integrate(); - - - - // regular communication vs neighbor list rebuild - - test_atom(testatom, "Pre Exchange"); - - MYDBG(printf("# CUDA VerletCuda::iterate: before neighbor decide\n");) - nflag = neighbor->decide(); - - if(nflag == 0) { - MYDBG(printf("# CUDA VerletCuda::iterate: communicate\n");) - timer->stamp(); - - if((not(eflag || vflag)) && (cuda->shared_data.overlap_comm)) { - //overlap forward communication of ghost atom positions with inner force calculation (interactions between local atoms) - //build communication buffers - // printf("Pre forward_comm(1)\n"); - my_gettime(CLOCK_REALTIME, &starttotal); - cuda->shared_data.atom.reneigh_flag = 0; - my_gettime(CLOCK_REALTIME, &starttime); - timer->stamp(); - comm->forward_comm(1); - timer->stamp(Timer::COMM); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.comm_forward_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - - //prepare force calculation - // printf("Pre force_clear\n"); - force_clear(); - // printf("Pre Generate XType\n"); - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - //start force calculation asynchronus - cuda->shared_data.comm.comm_phase = 1; - force->pair->compute(eflag, vflag); - timer->stamp(Timer::PAIR); - //CudaWrapper_Sync(); - - //download comm buffers from GPU, perform MPI communication and upload buffers again - my_gettime(CLOCK_REALTIME, &starttime); - comm->forward_comm(2); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.comm_forward_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - timer->stamp(Timer::COMM); - - //wait for force calculation - CudaWrapper_Sync(); - timer->stamp(Timer::PAIR); - - //unpack communication buffers - my_gettime(CLOCK_REALTIME, &starttime); - comm->forward_comm(3); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.comm_forward_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - - timer->stamp(Timer::COMM); - MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");) - cuda->shared_data.cuda_timings.test1 += - endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000; - } else { - //perform standard forward communication - my_gettime(CLOCK_REALTIME, &starttime); - comm->forward_comm(); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.comm_forward_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - timer->stamp(Timer::COMM); - MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");) - } - } else { - int nlocalold = cuda->shared_data.atom.nlocal; - - if(firstreneigh) { - cuda->shared_data.atom.update_nlocal = 1; - cuda->shared_data.atom.update_nmax = 1; - firstreneigh = 0; - } - - cuda->shared_data.buffer_new = 1; - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor\n");) - cuda->setDomainParams(); - - if(n_pre_exchange) modify->pre_exchange(); - - if(atom->nlocal != cuda->shared_data.atom.nlocal) { //did someone add atoms during pre_exchange? - cuda->checkResize(); - cuda->uploadAll(); - } - - //check domain changes - if(domain->triclinic) domain->x2lamda(atom->nlocal); - - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor pbc\n");) - domain->pbc(); - - if(domain->box_change) { - domain->reset_box(); - comm->setup(); - - if(neighbor->style) neighbor->setup_bins(); - - } - - timer->stamp(); - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor exchange\n");) - - //perform exchange of local atoms - my_gettime(CLOCK_REALTIME, &starttime); - comm->exchange(); - my_gettime(CLOCK_REALTIME, &endtime); - - //special and nspecial fields of the atom data are not currently transfered via the GPU buffer might be changed in the future - if(comm->nprocs > 1) { - my_gettime(CLOCK_REALTIME, &starttime); - - if(atom->special) - cuda->cu_special->upload(); - - if(atom->nspecial) - cuda->cu_nspecial->upload(); - - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.test1 += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - } - - cuda->shared_data.cuda_timings.comm_exchange_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - - if(nlocalold != cuda->shared_data.atom.nlocal) cuda->shared_data.atom.update_nlocal = 2; - - //sort atoms - if(sortflag && ntimestep >= atom->nextsort) atom->sort(); - - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor borders\n");) - - //generate ghost atom lists, and transfer ghost atom data - my_gettime(CLOCK_REALTIME, &starttime); - comm->borders(); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.comm_border_total += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - - my_gettime(CLOCK_REALTIME, &starttime); - //atom index maps are generated on CPU, and need to be transfered to GPU if they are used - if(cuda->cu_map_array) - cuda->cu_map_array->upload(); - - - if(domain->triclinic) domain->lamda2x(atom->nlocal + atom->nghost); - - if(n_pre_neighbor) modify->pre_neighbor(); - - cuda->shared_data.buffer_new = 2; - - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");) - timer->stamp(Timer::COMM); - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.test2 += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - - //rebuild neighbor list - test_atom(testatom, "Pre Neighbor"); - neighbor->build(0); - timer->stamp(Timer::NEIGH); - MYDBG(printf("# CUDA VerletCuda::iterate: neighbor done\n");) - //if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU - if(cuda->shared_data.pair.collect_forces_later) { - if(cuda->cu_molecule) cuda->cu_molecule->downloadAsync(2); - - cuda->cu_tag->downloadAsync(2); - cuda->cu_type->downloadAsync(2); - cuda->cu_mask->downloadAsync(2); - - if(cuda->cu_q) cuda->cu_q->downloadAsync(2); - } - cuda->shared_data.comm.comm_phase = 3; - } - - test_atom(testatom, "Post Exchange"); - - // force computations - - //only do force_clear if it has not been done during overlap of communication with local interactions - if(not((not(eflag || vflag)) && (cuda->shared_data.overlap_comm) && (cuda->shared_data.comm.comm_phase < 3))) - force_clear(); - - if(n_pre_force) modify->pre_force(vflag); - - timer->stamp(); - - //if overlap of bonded interactions with nonbonded interactions takes place, download forces and positions - /* if(cuda->shared_data.pair.collect_forces_later) - { - cuda->cu_x->downloadAsync(2); - cuda->cu_f->downloadAsync(2); - }*/ - - if(force->pair) { - if((not(eflag || vflag)) && (cuda->shared_data.overlap_comm) && (cuda->shared_data.comm.comm_phase < 3) && cuda->shared_data.pair.cudable_force) { - //second part of force calculations in case of overlaping it with commuincation. Only interactions between local and ghost atoms are done now - //regenerate data layout for force computations, its actually only needed for the ghost atoms - cuda->shared_data.comm.comm_phase = 2; - - my_times atime1, atime2; - my_gettime(CLOCK_REALTIME, &atime1); - - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - my_gettime(CLOCK_REALTIME, &atime2); - cuda->shared_data.cuda_timings.pair_xtype_conversion += - atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000; - force->pair->compute(eflag, vflag); - - } else { - //calculate complete pair interactions - if(not cuda->shared_data.pair.cudable_force) cuda->downloadAll(); - else { - //regenerate data layout for force computations, its actually only needed for the ghost atoms - my_times atime1, atime2; - my_gettime(CLOCK_REALTIME, &atime1); - - Cuda_Pair_GenerateXType(&cuda->shared_data); - - if(cuda->cu_v_radius) - Cuda_Pair_GenerateVRadius(&cuda->shared_data); - - if(cuda->cu_omega_rmass) - Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data); - - my_gettime(CLOCK_REALTIME, &atime2); - cuda->shared_data.cuda_timings.pair_xtype_conversion += - atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000; - } - - cuda->shared_data.comm.comm_phase = 0; - force->pair->compute(eflag, vflag); - } - - if(not cuda->shared_data.pair.cudable_force) cuda->uploadAll(); - - //wait for force calculation in case of not using overlap with bonded interactions - if(not cuda->shared_data.pair.collect_forces_later) - CudaWrapper_Sync(); - - timer->stamp(Timer::PAIR); - } - - //calculate bonded interactions - if(atom->molecular) { - cuda->cu_x->downloadAsync(2); - - if(n_pre_force == 0) Verlet::force_clear(); - else cuda->cu_f->downloadAsync(2); - - timer->stamp(Timer::PAIR); - - if(neighbor->lastcall == update->ntimestep) { - neighbor->build_topology(); - timer->stamp(Timer::NEIGH); - } - - test_atom(testatom, "pre bond force"); - - if(force->bond) force->bond->compute(eflag, vflag); - - if(force->angle) force->angle->compute(eflag, vflag); - - if(force->dihedral) force->dihedral->compute(eflag, vflag); - - if(force->improper) force->improper->compute(eflag, vflag); - - timer->stamp(Timer::BOND); - } - - //collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used - if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && (not(force->kspace && (not cuda->shared_data.pppm.cudable_force)))) { - my_gettime(CLOCK_REALTIME, &starttime); - cuda->cu_f->uploadAsync(2); - - test_atom(testatom, "post molecular force"); - - - if(eflag) cuda->cu_eng_vdwl->upload(); - - if(eflag) cuda->cu_eng_coul->upload(); - - if(vflag) cuda->cu_virial->upload(); - - Cuda_Pair_CollectForces(&cuda->shared_data, eflag, vflag); - - if(eflag) cuda->cu_eng_vdwl->download(); - - if(eflag) cuda->cu_eng_coul->download(); - - if(vflag) cuda->cu_virial->download(); - - timer->stamp(Timer::PAIR); - - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.pair_force_collection += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - } - - //compute kspace force - if(force->kspace) { - if((not cuda->shared_data.pppm.cudable_force) && (not cuda->shared_data.pair.collect_forces_later)) - cuda->downloadAll(); - - if((not cuda->shared_data.pppm.cudable_force) && (cuda->shared_data.pair.collect_forces_later) && (not atom->molecular)) { - cuda->cu_x->downloadAsync(2); - - if(n_pre_force == 0) Verlet::force_clear(); - else cuda->cu_f->downloadAsync(2); - - timer->stamp(Timer::PAIR); - } - - force->kspace->compute(eflag, vflag); - - if((not cuda->shared_data.pppm.cudable_force) && (not cuda->shared_data.pair.collect_forces_later)) - cuda->uploadAll(); - - timer->stamp(Timer::KSPACE); - } - - //collect forces in case pair forces and kspace was overlaped - if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && ((force->kspace && (not cuda->shared_data.pppm.cudable_force)))) { - cuda->cu_f->uploadAsync(2); - - my_gettime(CLOCK_REALTIME, &starttime); - - if(eflag) cuda->cu_eng_vdwl->upload(); - - if(eflag) cuda->cu_eng_coul->upload(); - - if(vflag) cuda->cu_virial->upload(); - - Cuda_Pair_CollectForces(&cuda->shared_data, eflag, vflag); - - if(eflag) cuda->cu_eng_vdwl->download(); - - if(eflag) cuda->cu_eng_coul->download(); - - if(vflag) cuda->cu_virial->download(); - - timer->stamp(Timer::PAIR); - - my_gettime(CLOCK_REALTIME, &endtime); - cuda->shared_data.cuda_timings.pair_force_collection += - endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - } - - //send forces on ghost atoms back to other GPU: THIS SHOULD NEVER HAPPEN - if(force->newton) { - comm->reverse_comm(); - timer->stamp(Timer::COMM); - } - - test_atom(testatom, "post force"); - // force modifications, final time integration, diagnostics - - if(n_post_force) modify->post_force(vflag); - - test_atom(testatom, "pre final"); - - modify->final_integrate(); - - test_atom(testatom, "post final"); - - if(n_end_of_step) modify->end_of_step(); - - // all output - - test_atom(testatom, "pre output"); - - if(ntimestep == output->next) { - if(not output->thermo->cudable) - cuda->downloadAll(); - - timer->stamp(); - output->write(ntimestep); - timer->stamp(Timer::OUTPUT); - } - - - test_atom(testatom, "post output"); - - if(cuda->shared_data.atom.update_nlocal > 0) - cuda->shared_data.atom.update_nlocal--; - - if(cuda->shared_data.atom.update_nmax > 0) - cuda->shared_data.atom.update_nmax--; - - if(cuda->shared_data.atom.update_neigh > 0) - cuda->shared_data.atom.update_neigh--; - - if(cuda->shared_data.domain.update > 0) - cuda->shared_data.domain.update--; - - if(cuda->shared_data.buffer_new > 0) - cuda->shared_data.buffer_new--; - - cuda->shared_data.atom.reneigh_flag = 0; - } - - - cuda->downloadAll(); - cuda->downloadAllNeighborLists(); - cuda->shared_data.atom.update_nlocal = 1; - cuda->shared_data.atom.update_nmax = 1; - cuda->shared_data.atom.update_neigh = 1; - cuda->shared_data.buffer_new = 1; - cuda->shared_data.domain.update = 1; - cuda->oncpu = true; - cuda->finished_run = true; -} - - -/* ---------------------------------------------------------------------- - clear force on own & ghost atoms - setup and clear other arrays as needed -------------------------------------------------------------------------- */ - -void VerletCuda::force_clear() -{ - cuda->cu_f->memset_device(0); - - if(cuda->cu_torque) cuda->cu_torque->memset_device(0); - -#if 0 - //The rest should not be necessary - int i; - - for(i = 0; i < atom->nlocal; i++) { - atom->f[i][0] = 0.0; - atom->f[i][1] = 0.0; - atom->f[i][2] = 0.0; - } - - // clear force on all particles - // if either newton flag is set, also include ghosts - - if(neighbor->includegroup == 0) { - int nall; - - if(force->newton) nall = atom->nlocal + atom->nghost; - else nall = atom->nlocal; - - if(torqueflag) { - double** torque = atom->torque; - - for(i = 0; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } - } - - // neighbor includegroup flag is set - // clear force only on initial nfirst particles - // if either newton flag is set, also include ghosts - - } else { - int nall = atom->nfirst; - - - if(torqueflag) { - double** torque = atom->torque; - - for(i = 0; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } - } - - if(force->newton) { - nall = atom->nlocal + atom->nghost; - - if(torqueflag) { - double** torque = atom->torque; - - for(i = atom->nlocal; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } - } - } - } -#endif -} - -void VerletCuda::test_atom(int aatom, const char* string) //printing properties of one atom for test purposes -{ - if(not dotestatom) return; - - bool check = false; - - if(cuda->finished_setup) cuda->downloadAll(); - - for(int i = 0; i < atom->nlocal + atom->nghost; i++) { - if((atom->tag[i] == aatom) && (i < atom->nlocal)) { - - printf("%i # CUDA %s: " BIGINT_FORMAT " %i %e %e %e %i ", - comm->me, string, update->ntimestep, atom->tag[i], - atom->x[i][0], atom->v[i][0], atom->f[i][0], i); - - if(atom->molecular && (i < atom->nlocal)) { - printf(" // %i %i %i ", atom->num_bond[i], atom->num_angle[i], atom->num_dihedral[i]); - - for(int k = 0; k < atom->num_bond[i]; k++) - printf("// %i %i ", atom->bond_type[i][k], atom->bond_atom[i][k]); - } - - printf("\n"); - } - - if(i < atom->nlocal) { - if((atom->v[i][0] < -100 || atom->v[i][0] > 100) || - (atom->v[i][1] < -100 || atom->v[i][1] > 100) || - (atom->v[i][2] < -100 || atom->v[i][2] > 100) || - (atom->v[i][0] != atom->v[i][0]) || - (atom->v[i][1] != atom->v[i][1]) || - (atom->v[i][2] != atom->v[i][2])) { - printf("%i # CUDA %s velocity: %i %e %e %e %i\n", comm->me, string, atom->tag[i], atom->x[i][0], atom->v[i][0], atom->f[i][0], i); - check = true; - } - - if((atom->f[i][0] < -10000 || atom->f[i][0] > 10000) || - (atom->f[i][1] < -10000 || atom->f[i][1] > 10000) || - (atom->f[i][2] < -10000 || atom->f[i][2] > 10000) || - (atom->f[i][0] != atom->f[i][0]) || - (atom->f[i][1] != atom->f[i][1]) || - (atom->f[i][2] != atom->f[i][2])) { - printf("%i # CUDA %s force: %i %e %e %e %i\n", comm->me, string, atom->tag[i], atom->x[i][0], atom->v[i][0], atom->f[i][0], i); - check = true; - } - - if(atom->tag[i] <= 0) - printf("%i # CUDA %s tag: %i %e %e %e %i\n", comm->me, string, atom->tag[i], atom->x[i][0], atom->v[i][0], atom->f[i][0], i); - } - } - - if(check) exit(0); -} diff --git a/src/USER-CUDA/verlet_cuda.h b/src/USER-CUDA/verlet_cuda.h deleted file mode 100644 index 6760828010..0000000000 --- a/src/USER-CUDA/verlet_cuda.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -#ifdef INTEGRATE_CLASS - -IntegrateStyle(verlet/cuda,VerletCuda) - -#else - - -#ifndef LMP_VERLET_CUDA_H -#define LMP_VERLET_CUDA_H -#include "verlet.h" -#include "modify_cuda.h" - -namespace LAMMPS_NS { - -class VerletCuda : public Verlet -{ - public: - VerletCuda(class LAMMPS *, int, char **); - void setup(); - void setup_minimal(int); - void run(int); - - void test_atom(int atom,const char* astring); //debugging purpose - int dotestatom; //debugging purpose - - protected: - class Cuda *cuda; - void force_clear(); - double time_pair; - double time_kspace; - double time_comm; - double time_modify; - double time_fulliterate; - ModifyCuda* modify_cuda; -}; - -} - -#endif -#endif -- GitLab