diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index bc33da60deb67ed65bbdd9c3b06c8cbc368a3381..967f22975978c177aa53e5918cf27a3a7e49869d 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -37,6 +37,10 @@ enable_language(CXX) ##################################################################### include(CheckCCompilerFlag) +if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict") +endif() + ######################################################################## # User input options # ######################################################################## @@ -76,7 +80,7 @@ add_definitions(-DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN}) option(LAMMPS_EXCEPTIONS "enable the use of C++ exceptions for error messages (useful for library interface)" OFF) if(LAMMPS_EXCEPTIONS) add_definitions(-DLAMMPS_EXCEPTIONS) - set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES -DLAMMPS_EXCEPTIONS") + set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES} -DLAMMPS_EXCEPTIONS") endif() set(LAMMPS_MACHINE "" CACHE STRING "Suffix to append to lmp binary and liblammps (WON'T enable any features automatically") @@ -101,7 +105,7 @@ set(OTHER_PACKAGES KIM PYTHON MSCG MPIIO VORONOI POEMS LATTE USER-CGSDK USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF USER-FEP USER-H5MD USER-LB USER-MANIFOLD USER-MEAMC USER-MGPT USER-MISC USER-MOLFILE USER-NETCDF USER-PHONON USER-QTB USER-REAXC USER-SMD - USER-SMTBQ USER-SPH USER-TALLY USER-VTK USER-QUIP USER-QMMM) + USER-SMTBQ USER-SPH USER-TALLY USER-UEF USER-VTK USER-QUIP USER-QMMM) set(ACCEL_PACKAGES USER-OMP KOKKOS OPT USER-INTEL GPU) foreach(PKG ${DEFAULT_PACKAGES}) option(ENABLE_${PKG} "Build ${PKG} Package" ${ENABLE_ALL}) @@ -665,7 +669,9 @@ include_directories(${LAMMPS_STYLE_HEADERS_DIR}) ############################################ add_library(lammps ${LIB_SOURCES}) target_link_libraries(lammps ${LAMMPS_LINK_LIBS}) -add_dependencies(lammps ${LAMMPS_DEPS}) +if(LAMMPS_DEPS) + add_dependencies(lammps ${LAMMPS_DEPS}) +endif() set_target_properties(lammps PROPERTIES OUTPUT_NAME lammps${LAMMPS_MACHINE}) if(BUILD_SHARED_LIBS) set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION}) diff --git a/doc/src/Eqs/bond_gromos.jpg b/doc/src/Eqs/bond_gromos.jpg new file mode 100644 index 0000000000000000000000000000000000000000..479e6b2d3b2ed907e9191564d8b8edbd42ea3f62 Binary files /dev/null and b/doc/src/Eqs/bond_gromos.jpg differ diff --git a/doc/src/Eqs/bond_gromos.tex b/doc/src/Eqs/bond_gromos.tex new file mode 100644 index 0000000000000000000000000000000000000000..2cd8c39535390e267735d2adca622468ae0decd1 --- /dev/null +++ b/doc/src/Eqs/bond_gromos.tex @@ -0,0 +1,10 @@ +\documentclass[12pt]{article} +\pagestyle{empty} + +\begin{document} + +$$ + E = K (r^2 - r_0^2)^2 +$$ + +\end{document} diff --git a/doc/src/Eqs/fix_rhok.jpg b/doc/src/Eqs/fix_rhok.jpg new file mode 100644 index 0000000000000000000000000000000000000000..829a866be449ecb0f23dc77f547c5b7d053c0469 Binary files /dev/null and b/doc/src/Eqs/fix_rhok.jpg differ diff --git a/doc/src/Eqs/fix_rhok.tex b/doc/src/Eqs/fix_rhok.tex new file mode 100644 index 0000000000000000000000000000000000000000..a468dfedc963e5d5616ca9baae019145bc421e51 --- /dev/null +++ b/doc/src/Eqs/fix_rhok.tex @@ -0,0 +1,11 @@ +\documentclass[12pt]{article} + +\begin{document} + +\begin{eqnarray*} + U &=& \frac{1}{2} K (|\rho_{\vec{k}}| - a)^2 \\ + \rho_{\vec{k}} &=& \sum_j^N \exp(-i\vec{k} \cdot \vec{r}_j )/\sqrt{N} \\ + \vec{k} &=& (2\pi n_x /L_x , 2\pi n_y /L_y , 2\pi n_z/L_z ) +\end{eqnarray*} + +\end{document} diff --git a/doc/src/JPG/uef_frames.jpg b/doc/src/JPG/uef_frames.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3b3bfc3a245de1b00d594f865b9c93f1149deffd Binary files /dev/null and b/doc/src/JPG/uef_frames.jpg differ diff --git a/doc/src/JPG/user_intel.png b/doc/src/JPG/user_intel.png index 7ec83b3207b06c4bbda7d56f2a7d9d94a15d115d..5061f1af2e26d9c2c1110390143d9ebf96946bd4 100755 Binary files a/doc/src/JPG/user_intel.png and b/doc/src/JPG/user_intel.png differ diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt index 303e951fabbc65a9904e5af9f1b589c39e8234c7..c31ec1758799000049c2825da2c413a2790002c3 100644 --- a/doc/src/Manual.txt +++ b/doc/src/Manual.txt @@ -1,7 +1,7 @@ <!-- HTML_ONLY --> <HEAD> <TITLE>LAMMPS Users Manual</TITLE> -<META NAME="docnumber" CONTENT="22 Sep 2017 version"> +<META NAME="docnumber" CONTENT="23 Oct 2017 version"> <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories"> <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License."> </HEAD> @@ -21,7 +21,7 @@ <H1></H1> LAMMPS Documentation :c,h3 -22 Sep 2017 version :c,h4 +23 Oct 2017 version :c,h4 Version info: :h4 diff --git a/doc/src/PDF/colvars-refman-lammps.pdf b/doc/src/PDF/colvars-refman-lammps.pdf index ad15752107cc006471cc4b3e7a400aa1658f26e2..daa1393269b953d569aff9846a135480dcdd42c7 100644 Binary files a/doc/src/PDF/colvars-refman-lammps.pdf and b/doc/src/PDF/colvars-refman-lammps.pdf differ diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index 0d46a01424ff0c986e5f9137b995a65ab11cbc3a..e816c8831b7c1a2ca9a1f99039b1f6ced7d08997 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -720,6 +720,8 @@ package"_Section_start.html#start_3. "nve/eff"_fix_nve_eff.html, "nvt/eff"_fix_nh_eff.html, "nvt/sllod/eff"_fix_nvt_sllod_eff.html, +"npt/uef"_fix_nh_uef.html, +"nvt/uef"_fix_nh_uef.html, "phonon"_fix_phonon.html, "pimd"_fix_pimd.html, "qbmsst"_fix_qbmsst.html, @@ -728,6 +730,7 @@ package"_Section_start.html#start_3. "qtb"_fix_qtb.html, "reax/c/bonds"_fix_reax_bonds.html, "reax/c/species"_fix_reaxc_species.html, +"rhok"_fix_rhok.html, "rx"_fix_rx.html, "saed/vtk"_fix_saed_vtk.html, "shardlow"_fix_shardlow.html, @@ -856,6 +859,7 @@ package"_Section_start.html#start_3. "meso/t/atom"_compute_meso_t_atom.html, "pe/tally"_compute_tally.html, "pe/mol/tally"_compute_tally.html, +"pressure/uef"_compute_pressure_uef.html, "saed"_compute_saed.html, "smd/contact/radius"_compute_smd_contact_radius.html, "smd/damage"_compute_smd_damage.html, @@ -884,6 +888,7 @@ package"_Section_start.html#start_3. "temp/deform/eff"_compute_temp_deform_eff.html, "temp/region/eff"_compute_temp_region_eff.html, "temp/rotate"_compute_temp_rotate.html, +"temp/uef"_compute_temp_uef.html, "xrd"_compute_xrd.html :tb(c=6,ea=c) :line @@ -915,11 +920,12 @@ KOKKOS, o = USER-OMP, t = OPT. "born/coul/long/cs"_pair_born.html, "born/coul/msm (o)"_pair_born.html, "born/coul/wolf (go)"_pair_born.html, +"born/coul/wolf/cs"_pair_born.html, "brownian (o)"_pair_brownian.html, "brownian/poly (o)"_pair_brownian.html, -"buck (gkio)"_pair_buck.html, -"buck/coul/cut (gkio)"_pair_buck.html, -"buck/coul/long (gkio)"_pair_buck.html, +"buck (giko)"_pair_buck.html, +"buck/coul/cut (giko)"_pair_buck.html, +"buck/coul/long (giko)"_pair_buck.html, "buck/coul/long/cs"_pair_buck.html, "buck/coul/msm (o)"_pair_buck.html, "buck/long/coul/long (o)"_pair_buck_long.html, @@ -934,12 +940,13 @@ KOKKOS, o = USER-OMP, t = OPT. "coul/msm"_pair_coul.html, "coul/streitz"_pair_coul.html, "coul/wolf (ko)"_pair_coul.html, -"dpd (go)"_pair_dpd.html, +"coul/wolf/cs"_pair_coul.html, +"dpd (gio)"_pair_dpd.html, "dpd/tstat (go)"_pair_dpd.html, "dsmc"_pair_dsmc.html, -"eam (gkiot)"_pair_eam.html, -"eam/alloy (gkiot)"_pair_eam.html, -"eam/fs (gkiot)"_pair_eam.html, +"eam (gikot)"_pair_eam.html, +"eam/alloy (gikot)"_pair_eam.html, +"eam/fs (gikot)"_pair_eam.html, "eim (o)"_pair_eim.html, "gauss (go)"_pair_gauss.html, "gayberne (gio)"_pair_gayberne.html, @@ -953,9 +960,9 @@ KOKKOS, o = USER-OMP, t = OPT. "kim"_pair_kim.html, "lcbop"_pair_lcbop.html, "line/lj"_pair_line_lj.html, -"lj/charmm/coul/charmm (kio)"_pair_charmm.html, +"lj/charmm/coul/charmm (iko)"_pair_charmm.html, "lj/charmm/coul/charmm/implicit (ko)"_pair_charmm.html, -"lj/charmm/coul/long (gkio)"_pair_charmm.html, +"lj/charmm/coul/long (giko)"_pair_charmm.html, "lj/charmm/coul/msm"_pair_charmm.html, "lj/charmmfsw/coul/charmmfsh"_pair_charmm.html, "lj/charmmfsw/coul/long"_pair_charmm.html, @@ -1005,9 +1012,9 @@ KOKKOS, o = USER-OMP, t = OPT. "resquared (go)"_pair_resquared.html, "snap"_pair_snap.html, "soft (go)"_pair_soft.html, -"sw (gkio)"_pair_sw.html, +"sw (giko)"_pair_sw.html, "table (gko)"_pair_table.html, -"tersoff (gkio)"_pair_tersoff.html, +"tersoff (giko)"_pair_tersoff.html, "tersoff/mod (gko)"_pair_tersoff_mod.html, "tersoff/mod/c (o)"_pair_tersoff_mod.html, "tersoff/zbl (gko)"_pair_tersoff_zbl.html, @@ -1111,6 +1118,7 @@ KOKKOS, o = USER-OMP, t = OPT. "class2 (ko)"_bond_class2.html, "fene (iko)"_bond_fene.html, "fene/expand (o)"_bond_fene_expand.html, +"gromos (o)"_bond_gromos.html, "harmonic (ko)"_bond_harmonic.html, "morse (o)"_bond_morse.html, "nonlinear (o)"_bond_nonlinear.html, @@ -1177,7 +1185,7 @@ USER-OMP, t = OPT. "none"_dihedral_none.html, "zero"_dihedral_zero.html, "hybrid"_dihedral_hybrid.html, -"charmm (ko)"_dihedral_charmm.html, +"charmm (iko)"_dihedral_charmm.html, "charmmfsw"_dihedral_charmm.html, "class2 (ko)"_dihedral_class2.html, "harmonic (io)"_dihedral_harmonic.html, @@ -1190,7 +1198,7 @@ used if "LAMMPS is built with the appropriate package"_Section_start.html#start_3. "cosine/shift/exp (o)"_dihedral_cosine_shift_exp.html, -"fourier (o)"_dihedral_fourier.html, +"fourier (io)"_dihedral_fourier.html, "nharmonic (o)"_dihedral_nharmonic.html, "quadratic (o)"_dihedral_quadratic.html, "spherical (o)"_dihedral_spherical.html, @@ -1213,7 +1221,7 @@ USER-OMP, t = OPT. "hybrid"_improper_hybrid.html, "class2 (ko)"_improper_class2.html, "cvff (io)"_improper_cvff.html, -"harmonic (ko)"_improper_harmonic.html, +"harmonic (iko)"_improper_harmonic.html, "umbrella (o)"_improper_umbrella.html :tb(c=4,ea=c) These are additional improper styles in USER packages, which can be diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt index d9a9fb4163a3d0186b2d803195b72be0e57ccf3d..912d371cd9451966dbe034c5cca214919e3062b6 100644 --- a/doc/src/Section_packages.txt +++ b/doc/src/Section_packages.txt @@ -150,6 +150,7 @@ Package, Description, Doc page, Example, Library "USER-SMTBQ"_#USER-SMTBQ, second moment tight binding QEq potential,"pair_style smtbq"_pair_smtbq.html, USER/smtbq, - "USER-SPH"_#USER-SPH, smoothed particle hydrodynamics,"SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, - "USER-TALLY"_#USER-TALLY, pairwise tally computes,"compute XXX/tally"_compute_tally.html, USER/tally, - +"USER-UEF"_#USER-UEF, extensional flow,"fix nvt/uef"_fix_nh_uef.html, USER/uef, - "USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext :tb(ea=c,ca1=l) :line @@ -705,7 +706,7 @@ dynamics can be run with LAMMPS using density-functional tight-binding quantum forces calculated by LATTE. More information on LATTE can be found at this web site: -"https://github.com/lanl/LATTE"_#latte_home. A brief technical +"https://github.com/lanl/LATTE"_latte_home. A brief technical description is given with the "fix latte"_fix_latte.html command. :link(latte_home,https://github.com/lanl/LATTE) @@ -728,6 +729,7 @@ make lib-latte args="-b" # download and build in lib/latte/LATTE- make lib-latte args="-p $HOME/latte" # use existing LATTE installation in $HOME/latte make lib-latte args="-b -m gfortran" # download and build in lib/latte and # copy Makefile.lammps.gfortran to Makefile.lammps +:pre Note that 3 symbolic (soft) links, "includelink" and "liblink" and "filelink", are created in lib/latte to point into the LATTE home dir. @@ -2770,13 +2772,44 @@ examples/USER/tally :ul :line +USER-UEF package :link(USER-UEF),h4 + +[Contents:] + +A fix style for the integration of the equations of motion under +extensional flow with proper boundary conditions, as well as several +supporting compute styles and an output option. + +[Author:] David Nicholson (MIT). + +[Install or un-install:] + +make yes-user-uef +make machine :pre + +make no-user-uef +make machine :pre + +[Supporting info:] + +src/USER-UEF: filenames -> commands +src/USER-UEF/README +"fix nvt/uef"_fix_nh_uef.html +"fix npt/uef"_fix_nh_uef.html +"compute pressure/uef"_compute_pressure_uef.html +"compute temp/uef"_compute_temp_uef.html +"dump cfg/uef"_dump_cfg_uef.html +examples/uef :ul + +:line + USER-VTK package :link(USER-VTK),h4 [Contents:] -A "dump vtk"_dump_vtk.html command which outputs -snapshot info in the "VTK format"_vtk, enabling visualization by -"Paraview"_paraview or other visuzlization packages. +A "dump vtk"_dump_vtk.html command which outputs snapshot info in the +"VTK format"_vtk, enabling visualization by "Paraview"_paraview or +other visuzlization packages. :link(vtk,http://www.vtk.org) :link(paraview,http://www.paraview.org) diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt index 83e17b4f2769522e53d125cb7249801cfcc2932a..aaa38d7de28a218d85952dfb5de3dcb6c2de1095 100644 --- a/doc/src/accelerate_intel.txt +++ b/doc/src/accelerate_intel.txt @@ -25,14 +25,14 @@ LAMMPS to run on the CPU cores and coprocessor cores simultaneously. [Currently Available USER-INTEL Styles:] Angle Styles: charmm, harmonic :ulb,l -Bond Styles: fene, harmonic :l +Bond Styles: fene, fourier, harmonic :l Dihedral Styles: charmm, harmonic, opls :l -Fixes: nve, npt, nvt, nvt/sllod :l +Fixes: nve, npt, nvt, nvt/sllod, nve/asphere :l Improper Styles: cvff, harmonic :l Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long, -buck, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, -lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo, -sw, tersoff :l +buck, dpd, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, +lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, +rebo, sw, tersoff :l K-Space Styles: pppm, pppm/disp :l :ule @@ -54,11 +54,12 @@ warmup run (for use with offload benchmarks). :c,image(JPG/user_intel.png) Results are speedups obtained on Intel Xeon E5-2697v4 processors -(code-named Broadwell) and Intel Xeon Phi 7250 processors -(code-named Knights Landing) with "June 2017" LAMMPS built with -Intel Parallel Studio 2017 update 2. Results are with 1 MPI task -per physical core. See {src/USER-INTEL/TEST/README} for the raw -simulation rates and instructions to reproduce. +(code-named Broadwell), Intel Xeon Phi 7250 processors (code-named +Knights Landing), and Intel Xeon Gold 6148 processors (code-named +Skylake) with "June 2017" LAMMPS built with Intel Parallel Studio +2017 update 2. Results are with 1 MPI task per physical core. See +{src/USER-INTEL/TEST/README} for the raw simulation rates and +instructions to reproduce. :line @@ -82,6 +83,11 @@ this order :l The {newton} setting applies to all atoms, not just atoms shared between MPI tasks :l Vectorization can change the order for adding pairwise forces :l +When using the -DLMP_USE_MKL_RNG define (all included intel optimized +makefiles do) at build time, the random number generator for +dissipative particle dynamics (pair style dpd/intel) uses the Mersenne +Twister generator included in the Intel MKL library (that should be +more robust than the default Masaglia random number generator) :l :ule The precision mode (described below) used with the USER-INTEL @@ -108,7 +114,7 @@ $t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l For some of the simple 2-body potentials without long-range electrostatics, performance and scalability can be better with the "newton off" setting added to the input script :l -For simulations on higher node counts, add "processors * * * grid +For simulations on higher node counts, add "processors * * * grid numa" to the beginning of the input script for better scalability :l If using {kspace_style pppm} in the input script, add "kspace_modify diff ad" for better performance :l @@ -119,8 +125,8 @@ For Intel Xeon Phi CPUs: Runs should be performed using MCDRAM. :ulb,l :ule -For simulations using {kspace_style pppm} on Intel CPUs -supporting AVX-512: +For simulations using {kspace_style pppm} on Intel CPUs supporting +AVX-512: Add "kspace_modify diff ad" to the input script :ulb,l The command-line option should be changed to @@ -237,14 +243,17 @@ However, if you do not have coprocessors on your system, building without offload support will produce a smaller binary. The general requirements for Makefiles with the USER-INTEL package -are as follows. "-DLAMMPS_MEMALIGN=64" is required for CCFLAGS. When -using Intel compilers, "-restrict" is required and "-qopenmp" is -highly recommended for CCFLAGS and LINKFLAGS. LIB should include -"-ltbbmalloc". For builds supporting offload, "-DLMP_INTEL_OFFLOAD" -is required for CCFLAGS and "-qoffload" is required for LINKFLAGS. -Other recommended CCFLAG options for best performance are -"-O2 -fno-alias -ansi-alias -qoverride-limits fp-model fast=2 --no-prec-div". +are as follows. When using Intel compilers, "-restrict" is required +and "-qopenmp" is highly recommended for CCFLAGS and LINKFLAGS. +CCFLAGS should include "-DLMP_INTEL_USELRT" (unless POSIX Threads +are not supported in the build environment) and "-DLMP_USE_MKL_RNG" +(unless Intel Math Kernel Library (MKL) is not available in the build +environment). For Intel compilers, LIB should include "-ltbbmalloc" +or if the library is not available, "-DLMP_INTEL_NO_TBB" can be added +to CCFLAGS. For builds supporting offload, "-DLMP_INTEL_OFFLOAD" is +required for CCFLAGS and "-qoffload" is required for LINKFLAGS. Other +recommended CCFLAG options for best performance are "-O2 -fno-alias +-ansi-alias -qoverride-limits fp-model fast=2 -no-prec-div". NOTE: The vectorization and math capabilities can differ depending on the CPU. For Intel compilers, the "-x" flag specifies the type of diff --git a/doc/src/atom_modify.txt b/doc/src/atom_modify.txt index d5c82f16ac951ebd86c3194658bfcc0826c1cb9f..1dc0fa6bfb6410df5eaef9abf35cebb3a66749fd 100644 --- a/doc/src/atom_modify.txt +++ b/doc/src/atom_modify.txt @@ -16,7 +16,7 @@ atom_modify keyword values ... :pre one or more keyword/value pairs may be appended :ulb,l keyword = {id} or {map} or {first} or {sort} :l {id} value = {yes} or {no} - {map} value = {array} or {hash} + {map} value = {yes} or {array} or {hash} {first} value = group-ID = group whose atoms will appear first in internal atom lists {sort} values = Nfreq binsize Nfreq = sort atoms spatially every this many time steps @@ -25,8 +25,8 @@ keyword = {id} or {map} or {first} or {sort} :l [Examples:] -atom_modify map hash -atom_modify map array sort 10000 2.0 +atom_modify map yes +atom_modify map hash sort 10000 2.0 atom_modify first colloid :pre [Description:] @@ -62,29 +62,33 @@ switch. This is described in "Section 2.2"_Section_start.html#start_2 of the manual. If atom IDs are not used, they must be specified as 0 for all atoms, e.g. in a data or restart file. -The {map} keyword determines how atom ID lookup is done for molecular -atom styles. Lookups are performed by bond (angle, etc) routines in -LAMMPS to find the local atom index associated with a global atom ID. - -When the {array} value is used, each processor stores a lookup table -of length N, where N is the largest atom ID in the system. This is a +The {map} keyword determines how atoms with specific IDs are found +when required. An example are the bond (angle, etc) methods which +need to find the local index of an atom with a specific global ID +which is a bond (angle, etc) partner. LAMMPS performs this operation +efficiently by creating a "map", which is either an {array} or {hash} +table, as descibed below. + +When the {map} keyword is not specified in your input script, LAMMPS +only creates a map for "atom_styles"_atom_style.html for molecular +systems which have permanent bonds (angles, etc). No map is created +for atomic systems, since it is normally not needed. However some +LAMMPS commands require a map, even for atomic systems, and will +generate an error if one does not exist. The {map} keyword thus +allows you to force the creation of a map. The {yes} value will +create either an {array} or {hash} style map, as explained in the next +paragraph. The {array} and {hash} values create an atom-style or +hash-style map respectively. + +For an {array}-style map, each processor stores a lookup table of +length N, where N is the largest atom ID in the system. This is a fast, simple method for many simulations, but requires too much memory -for large simulations. The {hash} value uses a hash table to perform -the lookups. This can be slightly slower than the {array} method, but -its memory cost is proportional to the number of atoms owned by a -processor, i.e. N/P when N is the total number of atoms in the system -and P is the number of processors. - -When this setting is not specified in your input script, LAMMPS -creates a map, if one is needed, as an array or hash. See the -discussion of default values below for how LAMMPS chooses which kind -of map to build. Note that atomic systems do not normally need to -create a map. However, even in this case some LAMMPS commands will -create a map to find atoms (and then destroy it), or require a -permanent map. An example of the former is the "velocity loop -all"_velocity.html command, which uses a map when looping over all -atoms and insuring the same velocity values are assigned to an atom -ID, no matter which processor owns it. +for large simulations. For a {hash}-style map, a hash table is +created on each processor, which finds an atom ID in constant time +(independent of the global number of atom IDs). It can be slightly +slower than the {array} map, but its memory cost is proportional to +the number of atoms owned by a processor, i.e. N/P when N is the total +number of atoms in the system and P is the number of processors. The {first} keyword allows a "group"_group.html to be specified whose atoms will be maintained as the first atoms in each processor's list diff --git a/doc/src/bond_gromos.txt b/doc/src/bond_gromos.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc3ff75878f36dd27f2d85b61ae3bfb522f52583 --- /dev/null +++ b/doc/src/bond_gromos.txt @@ -0,0 +1,73 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +bond_style gromos command :h3 +bond_style gromos/omp command :h3 + +[Syntax:] + +bond_style gromos :pre + +[Examples:] + +bond_style gromos +bond_coeff 5 80.0 1.2 :pre + +[Description:] + +The {gromos} bond style uses the potential + +:c,image(Eqs/bond_gromos.jpg) + +where r0 is the equilibrium bond distance. Note that the usual 1/4 +factor is included in K. + +The following coefficients must be defined for each bond type via the +"bond_coeff"_bond_coeff.html command as in the example above, or in +the data file or restart files read by the "read_data"_read_data.html +or "read_restart"_read_restart.html commands: + +K (energy/distance^4) +r0 (distance) :ul + +:line + +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + +[Restrictions:] + +This bond style can only be used if LAMMPS was built with the +MOLECULE package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info on packages. + +[Related commands:] + +"bond_coeff"_bond_coeff.html, "delete_bonds"_delete_bonds.html + +[Default:] none diff --git a/doc/src/bonds.txt b/doc/src/bonds.txt index 169d56ecbe4e7c75fce57abd58078dff9404a0cb..d33515eb88e36c3aca1525e5426a6cc80cbdeae8 100644 --- a/doc/src/bonds.txt +++ b/doc/src/bonds.txt @@ -8,6 +8,7 @@ Bond Styles :h1 bond_class2 bond_fene bond_fene_expand + bond_gromos bond_harmonic bond_harmonic_shift bond_harmonic_shift_cut diff --git a/doc/src/commands.txt b/doc/src/commands.txt index 06752f6960af88e82a389977681e81a29ac89ba7..5fb06f20118156b03bef56819c9a47442d5a30af 100644 --- a/doc/src/commands.txt +++ b/doc/src/commands.txt @@ -32,6 +32,7 @@ Commands :h1 dimension displace_atoms dump + dump_cfg_uef dump_h5md dump_image dump_modify diff --git a/doc/src/compute_pressure_uef.txt b/doc/src/compute_pressure_uef.txt new file mode 100644 index 0000000000000000000000000000000000000000..065fc044417601a052d0d9fd060ababbf9aa405c --- /dev/null +++ b/doc/src/compute_pressure_uef.txt @@ -0,0 +1,61 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +compute pressure/uef command :h3 + +[Syntax:] + +compute ID group-ID pressure/uef temp-ID keyword ... :pre + +ID, group-ID are documented in "compute"_compute.html command +pressure/uef = style name of this compute command +temp-ID = ID of compute that calculates temperature, can be NULL if not needed +zero or more keywords may be appended +keyword = {ke} or {pair} or {bond} or {angle} or {dihedral} or {improper} or {kspace} or {fix} or {virial} :ul + +[Examples:] + +compute 1 all pressure/uef my_temp_uef +compute 2 all pressure/uef my_temp_uef virial :pre + +[Description:] + +This command is used to compute the pressure tensor in +the reference frame of the applied flow field when +"fix nvt/uef"_fix_nh_uef.html" or +"fix npt/uef"_fix_nh_uef.html" is used. +It is not necessary to use this command to compute the scalar +value of the pressure. A "compute pressure"_compute_pressure.html +may be used for that purpose. + +The keywords and output information are documented in +"compute_pressure"_compute_pressure.html. + +[Restrictions:] + +This fix is part of the USER-UEF package. It is only enabled if +LAMMPS was built with that package. See the +"Making LAMMPS"_Section_start.html#start_3 section for more info. + +This command can only be used when "fix nvt/uef"_fix_nh_uef.html +or "fix npt/uef"_fix_nh_uef.html is active. + +The kinetic contribution to the pressure tensor +will be accurate only when +the compute specificed by {temp-ID} is a +"compute temp/uef"_compute_temp_uef.html. + +[Related commands:] + +"compute pressure"_compute_pressure.html, +"fix nvt/uef"_fix_nh_uef.html, +"compute temp/uef"_compute_temp_uef.html + +[Default:] none + + diff --git a/doc/src/compute_temp_uef.txt b/doc/src/compute_temp_uef.txt new file mode 100644 index 0000000000000000000000000000000000000000..acd3a6218d9e39d25a73be4d7562ee8127ae9cfa --- /dev/null +++ b/doc/src/compute_temp_uef.txt @@ -0,0 +1,52 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +compute temp/uef command :h3 + +[Syntax:] + +compute ID group-ID temp/uef :pre + +ID, group-ID are documented in "compute"_compute.html command +temp/uef = style name of this compute command :ul + +[Examples:] + +compute 1 all temp/uef +compute 2 sel temp/uef :pre + +[Description:] + +This command is used to compute the kinetic energy tensor in +the reference frame of the applied flow field when +"fix nvt/uef"_fix_nh_uef.html" or +"fix npt/uef"_fix_nh_uef.html" is used. +It is not necessary to use this command to compute the scalar +value of the temperature. A "compute temp"_compute_temp.html +may be used for that purpose. + +Output information for this command can be found in the +documentation for "compute temp"_compute_temp.html. + +[Restrictions:] + +This fix is part of the USER-UEF package. It is only enabled if +LAMMPS was built with that package. See the +"Making LAMMPS"_Section_start.html#start_3 section for more info. + +This command can only be used when "fix nvt/uef"_fix_nh_uef.html +or "fix npt/uef"_fix_nh_uef.html is active. + +[Related commands:] + +"compute temp"_compute_temp.html, +"fix nvt/uef"_fix_nh_uef.html, +"compute pressure/uef"_compute_pressure_uef.html + + +[Default:] none diff --git a/doc/src/computes.txt b/doc/src/computes.txt index c443bfaba2376bd5244ad0eef735d72fb9a388f6..1b64e2e5b46393313fc74099ef9bbf04da946b74 100644 --- a/doc/src/computes.txt +++ b/doc/src/computes.txt @@ -65,6 +65,7 @@ Computes :h1 compute_pe_atom compute_plasticity_atom compute_pressure + compute_pressure_uef compute_property_atom compute_property_chunk compute_property_local @@ -114,6 +115,7 @@ Computes :h1 compute_temp_region_eff compute_temp_rotate compute_temp_sphere + compute_temp_uef compute_ti compute_torque_chunk compute_vacf diff --git a/doc/src/dihedral_fourier.txt b/doc/src/dihedral_fourier.txt index da892b59daaf8eda8e540f9a26f850e55877b4aa..0accbb22bf80bfb89e1b77bd7fa8760e56ad3e55 100644 --- a/doc/src/dihedral_fourier.txt +++ b/doc/src/dihedral_fourier.txt @@ -7,6 +7,7 @@ :line dihedral_style fourier command :h3 +dihedral_style fourier/intel command :h3 dihedral_style fourier/omp command :h3 [Syntax:] diff --git a/doc/src/dump_cfg_uef.txt b/doc/src/dump_cfg_uef.txt new file mode 100644 index 0000000000000000000000000000000000000000..e257f9c4f16d2f768ee4e688f3d71e0a0dd0c1cd --- /dev/null +++ b/doc/src/dump_cfg_uef.txt @@ -0,0 +1,53 @@ + "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +dump cfg/uef command :h3 + +[Syntax:] + +dump ID group-ID cfg/uef N file mass type xs ys zs args :pre + +ID = user-assigned name for the dump :ulb,l +group-ID = ID of the group of atoms to be dumped :l +N = dump every this many timesteps :l +file = name of file to write dump info to :l +args = same as args for "dump custom"_dump.html :pre + +:ule + +[Examples:] + +dump 1 all cfg/uef 10 dump.*.cfg mass type xs ys zs +dump 2 all cfg/uef 100 dump.*.cfg mass type xs ys zs id c_stress :pre + +[Description:] + +This command is used to dump atomic coordinates in the +reference frame of the applied flow field when +"fix nvt/uef"_fix_nh_uef.html or +"fix npt/uef"_fix_nh_uef.html or is used. Only the atomic +coordinates and frame-invariant scalar quantities +will be in the flow frame. If velocities are selected +as output, for example, they will not be in the same +reference frame as the atomic positions. + +[Restrictions:] + +This fix is part of the USER-UEF package. It is only enabled if +LAMMPS was built with that package. See the +"Making LAMMPS"_Section_start.html#start_3 section for more info. + +This command can only be used when "fix nvt/uef"_fix_nh_uef.html +or "fix npt/uef"_fix_nh_uef.html is active. + +[Related commands:] + +"dump"_dump.html, +"fix nvt/uef"_fix_nh_uef.html + +[Default:] none diff --git a/doc/src/dump_modify.txt b/doc/src/dump_modify.txt index 2ea1da3db3b6295531b913bc9e25665a9c4c7ba7..db727c2d4f7bdec779f196524a71a7dfd2ea3193 100644 --- a/doc/src/dump_modify.txt +++ b/doc/src/dump_modify.txt @@ -15,8 +15,9 @@ dump_modify dump-ID keyword values ... :pre dump-ID = ID of dump to modify :ulb,l one or more keyword/value pairs may be appended :l these keywords apply to various dump styles :l -keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l - {append} arg = {yes} or {no} or {at} N +keyword = {append} or {at} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l + {append} arg = {yes} or {no} + {at} arg = N N = index of frame written upon first dump {buffer} arg = {yes} or {no} {element} args = E1 E2 ... EN, where N = # of atom types @@ -141,13 +142,18 @@ and {dcd}. It also applies only to text output files, not to binary or gzipped or image/movie files. If specified as {yes}, then dump snapshots are appended to the end of an existing dump file. If specified as {no}, then a new dump file will be created which will -overwrite an existing file with the same name. If the {at} option is present -({netcdf} only), then the frame to append to can be specified. Negative values -are counted from the end of the file. This keyword can only take effect if the -dump_modify command is used after the "dump"_dump.html command, but before the -first command that causes dump snapshots to be output, e.g. a "run"_run.html or -"minimize"_minimize.html command. Once the dump file has been opened, this -keyword has no further effect. +overwrite an existing file with the same name. + +:line + +The {at} keyword only applies to the {netcdf} dump style. It can only +be used if the {append yes} keyword is also used. The {N} argument is +the index of which frame to append to. A negative value can be +specified for {N}, which means a frame counted from the end of the +file. The {at} keyword can only be used if the dump_modify command is +before the first command that causes dump snapshots to be output, +e.g. a "run"_run.html or "minimize"_minimize.html command. Once the +dump file has been opened, this keyword has no further effect. :line diff --git a/doc/src/dump_netcdf.txt b/doc/src/dump_netcdf.txt index 63568137a65ec5d6891db69a9d7cf33d1be1a098..70111a36a8b28d9654db6779426090b7e37b507d 100644 --- a/doc/src/dump_netcdf.txt +++ b/doc/src/dump_netcdf.txt @@ -25,7 +25,8 @@ args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule dump 1 all netcdf 100 traj.nc type x y z vx vy vz dump_modify 1 append yes at -1 thermo yes -dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre +dump 1 all netcdf/mpiio 1000 traj.nc id type x y z +dump 1 all netcdf 1000 traj.*.nc id type x y z :pre [Description:] @@ -73,4 +74,3 @@ section for more info. [Related commands:] "dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html - diff --git a/doc/src/fix_deform.txt b/doc/src/fix_deform.txt index 63d872eded4d2e4c6a70a936e427d899115e3463..c870c73bdccf918288589c5141ba4dbafcc60434 100644 --- a/doc/src/fix_deform.txt +++ b/doc/src/fix_deform.txt @@ -86,11 +86,16 @@ Change the volume and/or shape of the simulation box during a dynamics run. Orthogonal simulation boxes have 3 adjustable parameters (x,y,z). Triclinic (non-orthogonal) simulation boxes have 6 adjustable parameters (x,y,z,xy,xz,yz). Any or all of them can be -adjusted independently and simultaneously by this command. This fix -can be used to perform non-equilibrium MD (NEMD) simulations of a -continuously strained system. See the "fix +adjusted independently and simultaneously by this command. + +This fix can be used to perform non-equilibrium MD (NEMD) simulations +of a continuously strained system. See the "fix nvt/sllod"_fix_nvt_sllod.html and "compute -temp/deform"_compute_temp_deform.html commands for more details. +temp/deform"_compute_temp_deform.html commands for more details. Note +that simulation of a continuously extended system (extensional flow) +can be modeled using the "USER-UEF +package"_Section_packages.html#USER-UEF and its "fix +commands"_fix_nh_uef.html. For the {x}, {y}, {z} parameters, the associated dimension cannot be shrink-wrapped. For the {xy}, {yz}, {xz} parameters, the associated diff --git a/doc/src/fix_latte.txt b/doc/src/fix_latte.txt index f78e13b866560403a2dd7276005c1fc698796377..4edd610546350ac082789fb517af918c7167c1f3 100644 --- a/doc/src/fix_latte.txt +++ b/doc/src/fix_latte.txt @@ -66,7 +66,7 @@ reference charge of overlapping atom-centered densities and bond integrals are parameterized using a Slater-Koster tight-binding approach. This procedure, which usually is referred to as the DFTB method has been described in detail by ("Elstner"_#Elstner) and -("Finnis"_#Finnis) and coworkers. +("Finnis"_#Finnis2) and coworkers. The work of the LATTE developers follows that of Elstner closely with respect to the physical model. However, the development of LATTE is @@ -173,7 +173,7 @@ M. Haugk, T. Frauenheim, S. Suhai, and G. Seifert, Phys. Rev. B, 58, M. Haugk, T. Frauenheim, S. Suhai, and G. Seifert, Phys. Rev. B, 58, 7260 (1998). -:link(Finnis) +:link(Finnis2) [(Finnis)] M. W. Finnis, A. T. Paxton, M. Methfessel, and M. van Schilfgarde, Phys. Rev. Lett., 81, 5149 (1998). @@ -197,11 +197,11 @@ J. Sci. Comput. 36 (2), 147-170, (2014). [(Niklasson2014)] A. M. N. Niklasson and M. Cawkwell, J. Chem. Phys., 141, 164123, (2014). -:link(Niklasson2014) +:link(Niklasson2017) [(Niklasson2017)] A. M. N. Niklasson, J. Chem. Phys., 147, 054103 (2017). -:link(Niklasson2012) -[(Niklasson2017)] A. M. N. Niklasson, M. J. Cawkwell, Phys. Rev. B, 86 +:link(Cawkwell2012) +[(Cawkwell2012)] A. M. N. Niklasson, M. J. Cawkwell, Phys. Rev. B, 86 (17), 174308 (2012). :link(Negre2016) diff --git a/doc/src/fix_neb.txt b/doc/src/fix_neb.txt index 52d8a7df84da725f0c37af433966446cf2720604..73b3e312665785ed7420f9b8ceb557e95be61bc4 100644 --- a/doc/src/fix_neb.txt +++ b/doc/src/fix_neb.txt @@ -93,7 +93,7 @@ intermediate replica with the previous and the next image: Fnudge_parallel = {Kspring} * (|Ri+1 - Ri| - |Ri - Ri-1|) :pre -Note that in this case the specified {Kspring) is in force/distance +Note that in this case the specified {Kspring} is in force/distance units. With a value of {ideal}, the spring force is computed as suggested in @@ -105,7 +105,7 @@ where RD is the "reaction coordinate" see "neb"_neb.html section, and RDideal is the ideal RD for which all the images are equally spaced. I.e. RDideal = (I-1)*meanDist when the climbing replica is off, where I is the replica number). The meanDist is the average distance -between replicas. Note that in this case the specified {Kspring) is +between replicas. Note that in this case the specified {Kspring} is in force units. Note that the {ideal} form of nudging can often be more effective at diff --git a/doc/src/fix_nh.txt b/doc/src/fix_nh.txt index 8fa30ac22289d7fe39ff6f1d0c314990dfb52c79..41d0e6438fc9e05d08aa43d1548a98c0856ffd90 100644 --- a/doc/src/fix_nh.txt +++ b/doc/src/fix_nh.txt @@ -393,32 +393,36 @@ thermostatting and barostatting. :line These fixes compute a temperature and pressure each timestep. To do -this, the fix creates its own computes of style "temp" and "pressure", -as if one of these two sets of commands had been issued: +this, the thermostat and barostat fixes create their own computes of +style "temp" and "pressure", as if one of these sets of commands had +been issued: +For fix nvt: compute fix-ID_temp group-ID temp -compute fix-ID_press group-ID pressure fix-ID_temp :pre +For fix npt and fix nph: compute fix-ID_temp all temp compute fix-ID_press all pressure fix-ID_temp :pre -See the "compute temp"_compute_temp.html and "compute -pressure"_compute_pressure.html commands for details. Note that the -IDs of the new computes are the fix-ID + underscore + "temp" or fix_ID -+ underscore + "press". For fix nvt, the group for the new computes -is the same as the fix group. For fix nph and fix npt, the group for -the new computes is "all" since pressure is computed for the entire -system. +For fix nvt, the group for the new temperature compute is the same as +the fix group. For fix npt and fix nph, the group for both the new +temperature and pressure compute is "all" since pressure is computed +for the entire system. In the case of fix nph, the temperature +compute is not used for thermostatting, but just for a kinetic-energy +contribution to the pressure. See the "compute +temp"_compute_temp.html and "compute pressure"_compute_pressure.html +commands for details. Note that the IDs of the new computes are the +fix-ID + underscore + "temp" or fix_ID + underscore + "press". Note that these are NOT the computes used by thermodynamic output (see the "thermo_style"_thermo_style.html command) with ID = {thermo_temp} -and {thermo_press}. This means you can change the attributes of this +and {thermo_press}. This means you can change the attributes of these fix's temperature or pressure via the -"compute_modify"_compute_modify.html command or print this temperature -or pressure during thermodynamic output via the "thermo_style -custom"_thermo_style.html command using the appropriate compute-ID. -It also means that changing attributes of {thermo_temp} or -{thermo_press} will have no effect on this fix. +"compute_modify"_compute_modify.html command. Or you can print this +temperature or pressure during thermodynamic output via the +"thermo_style custom"_thermo_style.html command using the appropriate +compute-ID. It also means that changing attributes of {thermo_temp} +or {thermo_press} will have no effect on this fix. Like other fixes that perform thermostatting, fix nvt and fix npt can be used with "compute commands"_compute.html that calculate a diff --git a/doc/src/fix_nh_uef.txt b/doc/src/fix_nh_uef.txt new file mode 100644 index 0000000000000000000000000000000000000000..bde1818371f2c4bcab0bafe11e19c4b04df107b8 --- /dev/null +++ b/doc/src/fix_nh_uef.txt @@ -0,0 +1,228 @@ +<"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix nvt/uef command :h3 +fix npt/uef command :h3 + +[Syntax:] + +fix ID group-ID style_name erate edot_x edot_y temp Tstart Tstop Tdamp keyword value ... :pre + +ID, group-ID are documented in "fix"_fix.html command :ulb,l +style_name = {nvt/uef} or {npt/uef} :l +{Tstart}, {Tstop}, and {Tdamp} are documented in the "fix npt"_fix_nh.html command :l +{edot_x} and {edot_y} are the strain rates in the x and y directions (1/(time units)) :l +one or more keyword/value pairs may be appended :l +keyword = {ext} or {strain} or {iso} or {x} or {y} or {z} or {tchain} or {pchain} or {tloop} or {ploop} or {mtk} + {ext} value = {x} or {y} or {z} or {xy} or {yz} or {xz} = external dimensions + sets the external dimensions used to calculate the scalar pressure + {strain} values = e_x e_y = initial strain + usually not needed, but may be needed to resume a run with a data file. + {iso}, {x}, {y}, {z}, {tchain}, {pchain}, {tloop}, {ploop}, {mtk} keywords + documented by the "fix npt"_fix_nh.html command :pre +:ule + +[Examples:] + +fix uniax_nvt all nvt/uef temp 400 400 100 erate 0.00001 -0.000005 +fix biax_nvt all nvt/uef temp 400 400 100 erate 0.000005 0.000005 +fix uniax_npt all npt/uef temp 400 400 300 iso 1 1 3000 erate 0.00001 -0.000005 ext yz +fix biax_npt all npt/uef temp 400 400 100 erate -0.00001 0.000005 x 1 1 3000 :pre + +[Description:] + +This fix can be used to simulate non-equilibrium molecular dynamics +(NEMD) under diagonal flow fields, including uniaxial and biaxial +flow. Simulations under continuous extensional flow may be carried +out for an indefinite amount of time. It is an implementation of the +boundary conditions from "(Dobson)"_#Dobson, and also uses numerical +lattice reduction as was proposed by "(Hunt)"_#Hunt. The lattice +reduction algorithm is from "(Semaev)"_Semaev. The fix is intended for +simulations of homogeneous flows, and integrates the SLLOD equations +of motion, originally proposed by Hoover and Ladd (see "(Evans and +Morriss)"_#Sllod). Additional detail about this implementation can be +found in "(Nicholson and Rutledge)"_#Nicholson. + +Note that NEMD simulations of a continuously strained system can be +performed using the "fix deform"_fix_deform.html, "fix +nvt/sllod"_fix_nvt_sllod.html, and "compute +temp/deform"_compute_temp_deform.html commands. + +The applied flow field is set by the {eps} keyword. The values +{edot_x} and {edot_y} correspond to the strain rates in the xx and yy +directions. It is implicitly assumed that the flow field is +traceless, and therefore the strain rate in the zz direction is eqal +to -({edot_x} + {edot_y}). + +NOTE: Due to an instability in the SLLOD equations under extension, +"fix momentum"_fix_momentum.html should be used to regularly reset the +linear momentum. + +The boundary conditions require a simulation box that does not have a +consistent alignment relative to the applied flow field. Since LAMMPS +utilizes an upper-triangular simulation box, it is not possible to +express the evolving simulation box in the same coordinate system as +the flow field. This fix keeps track of two coordinate systems: the +flow frame, and the upper triangular LAMMPS frame. The coordinate +systems are related to each other through the QR decomposition, as is +illustrated in the image below. + +:c,image(JPG/uef_frames.jpg) + +During most molecular dynamics operations, the system is represented +in the LAMMPS frame. Only when the positions and velocities are +updated is the system rotated to the flow frame, and it is rotated +back to the LAMMPS frame immediately afterwards. For this reason, all +vector-valued quantities (except for the tensors from +"compute_pressure/uef"_compute_pressure_uef.html and +"compute_temp/uef"_compute_temp_uef.html) will be computed in the +LAMMPS frame. Rotationally invariant scalar quantities like the +temperature and hydrostatic pressure are frame-invariant and will be +computed correctly. Additionally, the system is in the LAMMPS frame +during all of the output steps, and therefore trajectory files made +using the dump command will be in the LAMMPS frame unless the +"dump_cfg/uef"_dump_cfg_uef.html command is used. + +:line + +Temperature control is achieved with the default Nose-Hoover style +thermostat documented in "fix npt"_fix_nh.html. When this fix is +active, only the peculiar velocity of each atom is stored, defined as +the velocity relative to the streaming velocity. This is in contrast +to "fix nvt/sllod"_fix_nvt_sllod.html, which uses a lab-frame +velocity, and removes the contribution from the streaming velocity in +order to compute the temperature. + +Pressure control is achieved using the default Nose-Hoover barostat +documented in "fix npt"_fix_nh.html. There are two ways to control the +pressure using this fix. The first method involves using the {ext} +keyword along with the {iso} pressure style. With this method, the +pressure is controlled by scaling the simulation box isotropically to +achieve the average pressure only in the directions specified by +{ext}. For example, if the {ext} value is set to {xy}, the average +pressure (Pxx+Pyy)/2 will be controlled. + +This example command will control the total hydrostatic pressure under +uniaxial tension: + +fix f1 all npt/uef temp 0.7 0.7 0.5 iso 1 1 5 erate -0.5 -0.5 ext xyz :pre + +This example command will control the average stress in compression +directions, which would typically correspond to free surfaces under +drawing with uniaxial tension: + +fix f2 all npt/uef temp 0.7 0.7 0.5 iso 1 1 5 erate -0.5 -0.5 ext xy :pre + +The second method for pressure control involves setting the normal +stresses using the {x}, {y} , and/or {z} keywords. When using this +method, the same pressure must be specified via {Pstart} and {Pstop} +for all dimensions controlled. Any choice of pressure conditions that +would cause LAMMPS to compute a deviatoric stress are not permissible +and will result in an error. Additionally, all dimensions with +controlled stress must have the same applied strain rate. The {ext} +keyword must be set to the default value ({xyz}) when using this +method. + +For example, the following commands will work: + +fix f3 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 y 1 1 5 erate -0.5 -0.5 +fix f4 all npt/uef temp 0.7 0.7 0.5 z 1 1 5 erate 0.5 0.5 :pre + +The following commands will not work: + +fix f5 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 z 1 1 5 erate -0.5 -0.5 +fix f6 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 z 2 2 5 erate 0.5 0.5 :pre + +:line + +These fix computes a temperature and pressure each timestep. To do +this, it creates its own computes of style "temp/uef" and +"pressure/uef", as if one of these two sets of commands had been +issued: + +compute fix-ID_temp group-ID temp/uef +compute fix-ID_press group-ID pressure/uef fix-ID_temp :pre + +compute fix-ID_temp all temp/uef +compute fix-ID_press all pressure/uef fix-ID_temp :pre + +See the "compute temp/uef"_compute_temp_uef.html and "compute +pressure/uef"_compute_pressure_uef.html commands for details. Note +that the IDs of the new computes are the fix-ID + underscore + "temp" +or fix_ID + underscore + "press". + +[Restart, fix_modify, output, run start/stop, minimize info:] + +The fix writes the state of all the thermostat and barostat variables, +as well as the cumulative strain applied, to "binary restart +files"_restart.html. See the "read_restart"_read_restart.html command +for info on how to re-specify a fix in an input script that reads a +restart file, so that the operation of the fix continues in an +uninterrupted fashion. + +NOTE: It is not necessary to set the {strain} keyword when resuming a +run from a restart file. Only for resuming from data files, which do +not contain the cumulative applied strain, will this keyword be +necessary. + +This fix can be used with the "fix_modify"_fix_modify.html {temp} and +{press} options. The temperature and pressure computes used must be of +type {temp/uef} and {pressure/uef}. + +This fix computes the same global scalar and vecor quantities as "fix +npt"_fix_nh.html. + +The fix is not invoked during "energy minimization"_minimize.html. + +[Restrictions:] + +This fix is part of the USER-UEF package. It is only enabled if LAMMPS +was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +Due to requirements of the boundary conditions, when the {strain} +keyword is set to zero (or unset), the initial simulation box must be +cubic and have style triclinic. If the box is initially of type ortho, +use "change_box"_change_box.html before invoking the fix. + +NOTE: When resuming from restart files, you may need to use "box tilt +large"_box.html since lammps has internal criteria from lattice +reduction that are not the same as the criteria in the numerical +lattice reduction algorithm. + +[Related commands:] + +"fix nvt"_fix_nh.html, "fix nvt/sllod"_fix_nvt_sllod.html, "compute +temp/uef"_compute_temp_uef.html, "compute +pressure/uef"_compute_pressure_uef.html, "dump +cfg/uef"_dump_cfg_uef.html + +[Default:] + +The default keyword values specific to this fix are exy = xyz, strain += 0 0. The remaining defaults are the same as for {fix +npt}_fix_nh.html except tchain = 1. The reason for this change is +given in "fix nvt/sllod"_fix_nvt_sllod.html. + +:line + +:link(Dobson) +[(Dobson)] Dobson, J Chem Phys, 141, 184103 (2014). + +:link(Hunt) +[(Hunt)] Hunt, Mol Simul, 42, 347 (2016). + +:link(Semaev) +[(Semaev)] Semaev, Cryptography and Lattices, 181 (2001). + +:link(Sllod) +[(Evans and Morriss)] Evans and Morriss, Phys Rev A, 30, 1528 (1984). + +:link(Nicholson) +[(Nicholson and Rutledge)] Nicholson and Rutledge, J Chem Phys, 145, +244903 (2016). diff --git a/doc/src/fix_rhok.txt b/doc/src/fix_rhok.txt new file mode 100644 index 0000000000000000000000000000000000000000..2db920ac4bef02c60f95604beb5e6feeb9cb7de6 --- /dev/null +++ b/doc/src/fix_rhok.txt @@ -0,0 +1,56 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix rhok command :h3 + +fix ID group-ID rhok nx ny nz K a :pre + +ID, group-ID are documented in "fix"_fix.html command +nx, ny, nz = k-vektor of collective density field +K = spring constant of bias potential +a = anchor point of bias potential :ul + +[Examples:] + +fix bias all rhok 16 0 0 4.0 16.0 +fix 1 all npt temp 0.8 0.8 4.0 z 2.2 2.2 8.0 +# output of 4 values from fix rhok: U_bias rho_k_RE rho_k_IM |rho_k| +thermo_style custom step temp pzz lz f_bias f_bias\[1\] f_bias\[2\] f_bias\[3\] :pre + +[Description:] + +The fix applies a force to atoms given by the potential + +:c,image(Eqs/fix_rhok.jpg) + +as described in "(Pedersen)"_#Pedersen. + +This field, which biases configurations with long-range order, can be +used to study crystal-liquid interfaces and determine melting +temperatures "(Pedersen)"_#Pedersen. + +An example of using the interface pinning method is located in the +{examples/USER/misc/rhok} directory. + +[Restrictions:] + +This fix is part of the MISC package. It is only enabled if LAMMPS +was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +[Related commands:] + +"thermo_style"_thermo_style.html + +[Default:] none + +:line + +:link(Pedersen) +[(Pedersen)] Pedersen, J. Chem. Phys., 139, 104102 (2013). + diff --git a/doc/src/fix_rigid.txt b/doc/src/fix_rigid.txt index 99105e13507249636d2c0b5118450cf9d11da040..a5a631bd382e851c60cda8bfbe17c51eb0b73d18 100644 --- a/doc/src/fix_rigid.txt +++ b/doc/src/fix_rigid.txt @@ -26,6 +26,9 @@ style = {rigid} or {rigid/nve} or {rigid/nvt} or {rigid/npt} or {rigid/nph} or { bodystyle = {single} or {molecule} or {group} :l {single} args = none {molecule} args = none + {custom} args = {i_propname} or {v_varname} + i_propname = an integer property defined via fix property/atom + v_varname = an atom-style or atomfile-style variable {group} args = N groupID1 groupID2 ... N = # of groups groupID1, groupID2, ... = list of N group IDs :pre @@ -80,6 +83,16 @@ fix 1 rods rigid/npt molecule temp 300.0 300.0 100.0 iso 0.5 0.5 10.0 fix 1 particles rigid/npt molecule temp 1.0 1.0 5.0 x 0.5 0.5 1.0 z 0.5 0.5 1.0 couple xz fix 1 water rigid/nph molecule iso 0.5 0.5 1.0 fix 1 particles rigid/npt/small molecule temp 1.0 1.0 1.0 iso 0.5 0.5 1.0 :pre + +variable bodyid atom 1.0*gmask(clump1)+2.0*gmask(clump2)+3.0*gmask(clump3) +fix 1 clump rigid custom v_bodyid :pre + +variable bodyid atomfile bodies.txt +fix 1 clump rigid custom v_bodyid :pre + +fix 0 all property/atom i_bodyid +read_restart data.rigid fix 0 NULL Bodies +fix 1 clump rigid/small custom i_bodyid :pre [Description:] @@ -100,7 +113,7 @@ of a biomolecule such as a protein. Example of small rigid bodies are patchy nanoparticles, such as those modeled in "this paper"_#Zhang1 by Sharon Glotzer's group, clumps of -granular particles, lipid molecules consiting of one or more point +granular particles, lipid molecules consisting of one or more point dipoles connected to other spheroids or ellipsoids, irregular particles built from line segments (2d) or triangles (3d), and coarse-grain models of nano or colloidal particles consisting of a @@ -203,11 +216,11 @@ most one rigid body. Which atoms are in which bodies can be defined via several options. NOTE: With the {rigid/small} styles, which require that {bodystyle} be -specified as {molecule}, you can define a system that has no rigid -bodies initially. This is useful when you are using the {mol} keyword -in conjunction with another fix that is adding rigid bodies on-the-fly -as molecules, such as "fix deposit"_fix_deposit.html or "fix -pour"_fix_pour.html. +specified as {molecule} or {custom}, you can define a system that has +no rigid bodies initially. This is useful when you are using the {mol} +keyword in conjunction with another fix that is adding rigid bodies +on-the-fly as molecules, such as "fix deposit"_fix_deposit.html or +"fix pour"_fix_pour.html. For bodystyle {single} the entire fix group of atoms is treated as one rigid body. This option is only allowed for the {rigid} styles. @@ -222,6 +235,15 @@ molecule ID = 0) surrounding rigid bodies, this may not be what you want. Thus you should be careful to use a fix group that only includes atoms you want to be part of rigid bodies. +Bodystyle {custom} is similar to bodystyle {molecule}, however some +custom properties are used to group atoms into rigid bodies. The +special case for molecule/body ID = 0 is not available. Possible +custom properties are an integer property associated with atoms through +"fix property/atom"_fix_property_atom.html or an atom style variable +or an atomfile style variable. For the latter two, the variable value +will be rounded to an integer and then rigid bodies defined from +those values. + For bodystyle {group}, each of the listed groups is treated as a separate rigid body. Only atoms that are also in the fix group are included in each rigid body. This option is only allowed for the diff --git a/doc/src/fixes.txt b/doc/src/fixes.txt index 7000a66c51dc3df60b1c9701fa77e57c5768fd22..ad3e95fa4170422c0472e541c98adc6dd331b285 100644 --- a/doc/src/fixes.txt +++ b/doc/src/fixes.txt @@ -59,6 +59,7 @@ Fixes :h1 fix_langevin fix_langevin_drude fix_langevin_eff + fix_latte fix_lb_fluid fix_lb_momentum fix_lb_pc @@ -76,6 +77,7 @@ Fixes :h1 fix_neb fix_nh fix_nh_eff + fix_nh_uef fix_nph_asphere fix_nph_body fix_nph_sphere @@ -124,6 +126,7 @@ Fixes :h1 fix_reaxc_species fix_recenter fix_restrain + fix_rhok fix_rigid fix_rx fix_saed_vtk diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 86dfe78af35b27f8c58ba50da4fd5b280fe1a4c2..0691f43e9b1c120df66b9e8ba372e33f9019eb80 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -62,6 +62,7 @@ dump_modify.html dump_molfile.html dump_netcdf.html dump_vtk.html +dump_cfg_uef.html echo.html fix.html fix_modify.html @@ -187,6 +188,7 @@ fix_ipi.html fix_langevin.html fix_langevin_drude.html fix_langevin_eff.html +fix_latte.html fix_lb_fluid.html fix_lb_momentum.html fix_lb_pc.html @@ -231,6 +233,7 @@ fix_nvt_manifold_rattle.html fix_nvt_sllod.html fix_nvt_sllod_eff.html fix_nvt_sphere.html +fix_nh_uef.html fix_oneway.html fix_orient.html fix_phonon.html @@ -253,6 +256,7 @@ fix_reaxc_species.html fix_recenter.html fix_restrain.html fix_rigid.html +fix_rhok.html fix_rx.html fix_saed_vtk.html fix_setforce.html @@ -354,6 +358,7 @@ compute_pe.html compute_pe_atom.html compute_plasticity_atom.html compute_pressure.html +compute_pressure_uef.html compute_property_atom.html compute_property_chunk.html compute_property_local.html @@ -403,6 +408,7 @@ compute_temp_region.html compute_temp_region_eff.html compute_temp_rotate.html compute_temp_sphere.html +compute_temp_uef.html compute_ti.html compute_torque_chunk.html compute_vacf.html @@ -514,7 +520,7 @@ pair_zero.html bond_class2.html bond_fene.html bond_fene_expand.html -bond_oxdna.html +bond_gromos.html bond_harmonic.html bond_harmonic_shift.html bond_harmonic_shift_cut.html @@ -522,6 +528,7 @@ bond_hybrid.html bond_morse.html bond_none.html bond_nonlinear.html +bond_oxdna.html bond_quartic.html bond_table.html bond_zero.html diff --git a/doc/src/package.txt b/doc/src/package.txt index 58f6a5e34db2ccf9ee89c3b779742b6f8e7559aa..5c698934e857bfa31bdceac69bb3296ff76961c3 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -62,7 +62,7 @@ args = arguments specific to the style :l {no_affinity} values = none {kokkos} args = keyword value ... zero or more keyword/value pairs may be appended - keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} + keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} {neigh} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner @@ -75,9 +75,10 @@ args = arguments specific to the style :l {binsize} value = size size = bin size for neighbor list construction (distance units) {comm} value = {no} or {host} or {device} - use value for both comm/exchange and comm/forward + use value for comm/exchange and comm/forward and comm/reverse {comm/exchange} value = {no} or {host} or {device} {comm/forward} value = {no} or {host} or {device} + {comm/reverse} value = {no} or {host} or {device} no = perform communication pack/unpack in non-KOKKOS mode host = perform pack/unpack on host (e.g. with OpenMP threading) device = perform pack/unpack on device (e.g. on GPU) @@ -429,17 +430,18 @@ Coulombic solver"_kspace_style.html because the GPU is faster at performing pairwise interactions, then this rule of thumb may give too large a binsize. -The {comm} and {comm/exchange} and {comm/forward} keywords determine +The {comm} and {comm/exchange} and {comm/forward} and {comm/reverse} keywords determine whether the host or device performs the packing and unpacking of data when communicating per-atom data between processors. "Exchange" communication happens only on timesteps that neighbor lists are rebuilt. The data is only for atoms that migrate to new processors. -"Forward" communication happens every timestep. The data is for atom +"Forward" communication happens every timestep. "Reverse" communication +happens every timestep if the {newton} option is on. The data is for atom coordinates and any other atom properties that needs to be updated for ghost atoms owned by each processor. The {comm} keyword is simply a short-cut to set the same value -for both the {comm/exchange} and {comm/forward} keywords. +for both the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. The value options for all 3 keywords are {no} or {host} or {device}. A value of {no} means to use the standard non-KOKKOS method of diff --git a/doc/src/pair_born.txt b/doc/src/pair_born.txt index a3cc744a2203cd92ae8314dca242a0e76f70d8cb..a016f77fa3ba6c96ef18fbb7d1094e79a0475aab 100644 --- a/doc/src/pair_born.txt +++ b/doc/src/pair_born.txt @@ -17,6 +17,7 @@ pair_style born/coul/long/omp command :h3 pair_style born/coul/msm command :h3 pair_style born/coul/msm/omp command :h3 pair_style born/coul/wolf command :h3 +pair_style born/coul/wolf/cs command :h3 pair_style born/coul/wolf/gpu command :h3 pair_style born/coul/wolf/omp command :h3 pair_style born/coul/dsf command :h3 @@ -36,7 +37,7 @@ args = list of arguments for a particular style :ul {born/coul/msm} args = cutoff (cutoff2) cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units) cutoff2 = global cutoff for Coulombic (optional) (distance units) - {born/coul/wolf} args = alpha cutoff (cutoff2) + {born/coul/wolf} or {born/coul/wolf/cs} args = alpha cutoff (cutoff2) alpha = damping parameter (inverse distance units) cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units) cutoff2 = global cutoff for Coulombic (optional) (distance units) @@ -65,6 +66,7 @@ pair_coeff 1 1 6.08 0.317 2.340 24.18 11.51 :pre pair_style born/coul/wolf 0.25 10.0 pair_style born/coul/wolf 0.25 10.0 9.0 +pair_style born/coul/wolf/cs 0.25 10.0 9.0 pair_coeff * * 6.08 0.317 2.340 24.18 11.51 pair_coeff 1 1 6.08 0.317 2.340 24.18 11.51 :pre @@ -106,8 +108,9 @@ damped shifted force model as in the "coul/dsf"_pair_coul.html style. Style {born/coul/long/cs} is identical to {born/coul/long} except that a term is added for the "core/shell model"_Section_howto.html#howto_25 to allow charges on core and shell particles to be separated by r = -0.0. The same correction is introduced for {born/coul/dsf/cs} style -which is identical to {born/coul/dsf}. +0.0. The same correction is introduced for the {born/coul/dsf/cs} +style which is identical to {born/coul/dsf}. And likewise for +{born/coul/wolf/cs} style which is identical to {born/coul/wolf}. Note that these potentials are related to the "Buckingham potential"_pair_buck.html. diff --git a/doc/src/pair_coul.txt b/doc/src/pair_coul.txt index 29e5beed3c755f9e85b2b5d49f3fcbcdd4095470..4cca5ee0d78a529162e782121855150daa162e06 100644 --- a/doc/src/pair_coul.txt +++ b/doc/src/pair_coul.txt @@ -29,6 +29,7 @@ pair_style coul/streitz command :h3 pair_style coul/wolf command :h3 pair_style coul/wolf/kk command :h3 pair_style coul/wolf/omp command :h3 +pair_style coul/wolf/cs command :h3 pair_style tip4p/cut command :h3 pair_style tip4p/long command :h3 pair_style tip4p/cut/omp command :h3 @@ -43,6 +44,7 @@ pair_style coul/long cutoff pair_style coul/long/cs cutoff pair_style coul/long/gpu cutoff pair_style coul/wolf alpha cutoff +pair_style coul/wolf/cs alpha cutoff pair_style coul/streitz cutoff keyword alpha pair_style tip4p/cut otype htype btype atype qdist cutoff pair_style tip4p/long otype htype btype atype qdist cutoff :pre @@ -72,6 +74,7 @@ pair_style coul/msm 10.0 pair_coeff * * :pre pair_style coul/wolf 0.2 9.0 +pair_style coul/wolf/cs 0.2 9.0 pair_coeff * * :pre pair_style coul/streitz 12.0 ewald @@ -202,7 +205,9 @@ interactions outside that distance are computed in reciprocal space. Style {coul/long/cs} is identical to {coul/long} except that a term is added for the "core/shell model"_Section_howto.html#howto_25 to allow -charges on core and shell particles to be separated by r = 0.0. +charges on core and shell particles to be separated by r = 0.0. The +same correction is introduced for the {coul/wolf/cs} style which is +identical to {coul/wolf}. Styles {tip4p/cut} and {tip4p/long} implement the coulomb part of the TIP4P water model of "(Jorgensen)"_#Jorgensen3, which introduces diff --git a/doc/src/pair_cs.txt b/doc/src/pair_cs.txt index 64dfc20d4748e774b51051dba65cd25953f04550..c1084c608797863a3ecc7528cdb457298d8c3512 100644 --- a/doc/src/pair_cs.txt +++ b/doc/src/pair_cs.txt @@ -9,12 +9,13 @@ pair_style born/coul/long/cs command :h3 pair_style buck/coul/long/cs command :h3 pair_style born/coul/dsf/cs command :h3 +pair_style born/coul/wolf/cs command :h3 [Syntax:] pair_style style args :pre -style = {born/coul/long/cs} or {buck/coul/long/cs} or {born/coul/dsf/cs} +style = {born/coul/long/cs} or {buck/coul/long/cs} or {born/coul/dsf/cs} or {born/coul/wolf/cs} args = list of arguments for a particular style :ul {born/coul/long/cs} args = cutoff (cutoff2) cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units) @@ -26,6 +27,10 @@ args = list of arguments for a particular style :ul alpha = damping parameter (inverse distance units) cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units) cutoff2 = global cutoff for Coulombic (distance units) :pre + {born/coul/wolf/cs} args = alpha cutoff (cutoff2) + alpha = damping parameter (inverse distance units) + cutoff = global cutoff for Buckingham (and Coulombic if only 1 arg) (distance units) + cutoff2 = global cutoff for Coulombic (optional) (distance units) [Examples:] @@ -41,6 +46,10 @@ pair_style born/coul/dsf/cs 0.1 10.0 12.0 pair_coeff * * 0.0 1.00 0.00 0.00 0.00 pair_coeff 1 1 480.0 0.25 0.00 1.05 0.50 :pre +pair_style born/coul/wolf/cs 0.25 10.0 12.0 +pair_coeff * * 0.0 1.00 0.00 0.00 0.00 +pair_coeff 1 1 480.0 0.25 0.00 1.05 0.50 :pre + [Description:] These pair styles are designed to be used with the adiabatic @@ -73,13 +82,21 @@ the core and shell, epsilon is the dielectric constant and r_min is the minimal distance. The pair style {born/coul/dsf/cs} is identical to the -"pair_style born/coul/dsf"_pair_born.html style, which uses the +"pair_style born/coul/dsf"_pair_born.html style, which uses the damped shifted force model as in "coul/dsf"_pair_coul.html to compute the Coulomb contribution. This approach does not require a long-range solver, thus the only correction is the addition of a minimal distance to avoid the possible r = 0.0 case for a core/shell pair. +The pair style {born/coul/wolf/cs} is identical to the +"pair_style born/coul/wolf"_pair_born.html style, which uses +the Wolf summation as in "coul/wolf"_pair_coul.html to compute +the Coulomb contribution. This approach does not require +a long-range solver, thus the only correction is the addition of a +minimal distance to avoid the possible r = 0.0 case for a +core/shell pair. + [Restrictions:] These pair styles are part of the CORESHELL package. They are only diff --git a/doc/src/pair_dpd.txt b/doc/src/pair_dpd.txt index 8d194bb0920999dadfaf7673619369c0550e68d5..9e29e93430602fd485bef467602d7001c8534fab 100644 --- a/doc/src/pair_dpd.txt +++ b/doc/src/pair_dpd.txt @@ -8,6 +8,7 @@ pair_style dpd command :h3 pair_style dpd/gpu command :h3 +pair_style dpd/intel command :h3 pair_style dpd/omp command :h3 pair_style dpd/tstat command :h3 pair_style dpd/tstat/gpu command :h3 diff --git a/doc/src/pair_eam.txt b/doc/src/pair_eam.txt index a0026432ec6cd52255ca53f8a084c8f10b60454f..03e77f53ab0f291be3054b1464d1936b057e95e6 100644 --- a/doc/src/pair_eam.txt +++ b/doc/src/pair_eam.txt @@ -294,7 +294,7 @@ distribution have a ".cdeam" suffix. Style {eam/fs} computes pairwise interactions for metals and metal alloys using a generalized form of EAM potentials due to Finnis and -Sinclair "(Finnis)"_#Finnis. The total energy Ei of an atom I is +Sinclair "(Finnis)"_#Finnis1. The total energy Ei of an atom I is given by :c,image(Eqs/pair_eam_fs.jpg) @@ -442,7 +442,7 @@ of Physics: Condensed Matter, 16, S2629 (2004). [(Daw)] Daw, Baskes, Phys Rev Lett, 50, 1285 (1983). Daw, Baskes, Phys Rev B, 29, 6443 (1984). -:link(Finnis) +:link(Finnis1) [(Finnis)] Finnis, Sinclair, Philosophical Magazine A, 50, 45 (1984). :link(Stukowski) diff --git a/examples/USER/misc/rhok/README.md b/examples/USER/misc/rhok/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4e011255fcc8e7628379451a94ee30db8ec867a3 --- /dev/null +++ b/examples/USER/misc/rhok/README.md @@ -0,0 +1,74 @@ +# The Interface Pinning method for studying solid-liquid transitions + +In this example we will use the interface pinnig method to study a solid-liquid transition. +This is done by adding a harmonic potential to the Hamiltonian +that bias the system towards two-phase configurations: + + U_bias = 0.5*K*(Q-a)^2 + +The bias field couple to an order-parameter of crystallinity Q. The implementation use long-range order: + + Q=|rho_k|, + +where rho_k is the collective density field of the wave-vector k. +For future reference we note that the structure factor S(k) is given by the variance of the collective density field: + + S(k)=|rho_k|^2. + +### About the method + +It is recommended to get familiar with the interface pinning method by reading: + + [Ulf R. Pedersen, JCP 139, 104102 (2013)](http://dx.doi.org/10.1063/1.4818747) + +A detailed bibliography is provided at + + <http://urp.dk/interface_pinning.htm> + +and a brief introduction can be found at YouTube: + + [](http://www.youtube.com/watch?v=F_79JZNdyoQ) + +### Implimentation in LAMMPS + +For this example we will be using the rhok fix. + + fix [name] [groupID] rhok [nx] [ny] [nz] [K] [a] + +This fix include a harmonic bias potential U_bias=0.5*K*(|rho_k|-a)^2 to the force calculation. +The elements of the wave-vector k is given by the nx, ny and nz input: + + k_x = (2 pi / L_x) * n_x, k_y = (2 pi / L_y) * n_y and k_z = (2 pi / L_z) * n_z. + +We will use a k vector that correspond to a Bragg peak. + +## Example: the Lennard-Jones (LJ) model + +We will use the interface pinning method to study melting of the LJ model +at temperature 0.8 and pressure 2.185. This is a coexistence state-point, and the method +can be used to show this. The present directory contains the input files that we will use: + + in.crystal + in.setup + in.pinning + +1. First we will determine the density of the crystal with the LAMMPS input file in.crystal. + From the output we get that the average density after equilibration is 0.9731. + We need this density to ensure hydrostatic pressure when in a two-phase simulation. + +2. Next, we setup a two-phase configuration using in.setup. + +3. Finally, we run a two-phase simulation with the bias-field applied using in.pinning. + The last column in the output show |rho_k|. We note that after a equilibration period + the value fluctuates around the anchor point (a) -- showing that this is indeed a coexistence + state point. + +The reference [JCP 139, 104102 (2013)](http://dx.doi.org/10.1063/1.4818747) gives details on using the method to find coexistence state points, +and the reference [JCP 142, 044104 (2015)](http://dx.doi.org/10.1063/1.4818747) show how the crystal growth rate can be computed from fluctuations. +That method have been experienced to be most effective in the slightly super-heated regime above the melting temperature. + +## Contact + + Ulf R. Pedersen; + <http://www.urp.dk>; + ulf AT urp.dk diff --git a/examples/USER/misc/rhok/in.crystal b/examples/USER/misc/rhok/in.crystal new file mode 100644 index 0000000000000000000000000000000000000000..55e9e59a064ad93165d3a5f35ca5661b5e1be3c2 --- /dev/null +++ b/examples/USER/misc/rhok/in.crystal @@ -0,0 +1,36 @@ +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +lattice fcc 0.9731 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +pair_coeff 1 1 1.0 1.0 2.5 +pair_modify tail no +pair_modify shift yes +mass 1 1.0 +velocity all create 1.6 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32 + +# computing long-range order (no bias is added since k=0) +fix bias all rhok 16 0 0 0.0 0.0 + +# output +thermo 50 +thermo_style custom step temp press density f_bias[3] +# dump dumpXYZ all xyz 2000 traj.xyz + +# NOTE: this is cut short to 5000 steps for demonstration purposes +# run 100000 +run 5000 diff --git a/examples/USER/misc/rhok/in.pinning b/examples/USER/misc/rhok/in.pinning new file mode 100644 index 0000000000000000000000000000000000000000..0c220f480b7ca8586c29b96b0455dd5002efda4d --- /dev/null +++ b/examples/USER/misc/rhok/in.pinning @@ -0,0 +1,33 @@ +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +read_data data.halfhalf +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet + +velocity all create 0.8 1 mom yes rot yes +fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0 +fix 100 all momentum 100 linear 1 1 1 + +# harmonic rho_k bias-field +# nx ny nz K a +fix bias all rhok 16 0 0 4.0 26.00 + +# output U_bias rho_k_RE rho_k_IM |rho_k| +thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3] +thermo 50 +# dump dumpXYZ all xyz 500 traj.xyz + +# NOTE: run reduced for demonstration purposes +# run 50000 +run 5000 diff --git a/examples/USER/misc/rhok/in.setup b/examples/USER/misc/rhok/in.setup new file mode 100644 index 0000000000000000000000000000000000000000..649b0f534c8fc8821ec301f2addac3568e3a120e --- /dev/null +++ b/examples/USER/misc/rhok/in.setup @@ -0,0 +1,41 @@ +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes + +# fcc lattice +lattice fcc 0.9731 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 +change_box all z final 0.0 34 remap units box + +# select particles in one side of the elongated box +region left plane 0 0 10 0 0 1 +group left region left + +velocity left create 6.0 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble left nve # Note: only move particle in left-hand side +fix langevin left langevin 3.0 0.8 100.0 2017 + +# outout +thermo_style custom step temp pzz pe lz +thermo 100 +# dump dumpXYZ all xyz 100 traj.xyz + +# run reduced for demonstration purposes +# run 10000 +run 5000 +write_data data.halfhalf diff --git a/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..05fadb5c032d51bca8064d40cb63a5948fa652ee --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1 @@ -0,0 +1,187 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +lattice fcc 0.9731 +Lattice spacing in x,y,z = 1.6019 1.6019 1.6019 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379) + 1 by 1 by 1 MPI processor grid +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +Created 5120 atoms +pair_coeff 1 1 1.0 1.0 2.5 +pair_modify tail no +pair_modify shift yes +mass 1 1.0 +velocity all create 1.6 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32 + +# computing long-range order (no bias is added since k=0) +fix bias all rhok 16 0 0 0.0 0.0 + +# output +thermo 50 +thermo_style custom step temp press density f_bias[3] +# dump dumpXYZ all xyz 2000 traj.xyz + +# NOTE: this is cut short to 5000 steps for demonstration purposes +# run 100000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 21 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 4.523 | 4.523 | 4.523 Mbytes +Step Temp Press Density f_bias[3] + 0 1.6 -2.7568106 0.9731 71.554175 + 50 0.78457786 3.1029192 0.97362639 54.327705 + 100 0.85528971 2.4670259 0.97213457 55.189308 + 150 0.85241818 2.3210306 0.9698027 56.138125 + 200 0.82301385 2.3448692 0.96708227 55.735326 + 250 0.83076383 2.0890816 0.96425763 55.320625 + 300 0.81602823 2.0118796 0.96173925 54.095736 + 350 0.81084006 1.9122192 0.95979392 54.526429 + 400 0.80776593 1.8502174 0.95869117 54.434901 + 450 0.80694697 1.8435873 0.95851085 53.20809 + 500 0.81384248 1.8111331 0.95917305 53.419395 + 550 0.81027072 1.9222272 0.96056019 54.36723 + 600 0.81199582 2.0291945 0.96248486 54.888582 + 650 0.82507964 2.0706462 0.96467227 55.807137 + 700 0.832562 2.1471442 0.9668913 56.721267 + 750 0.83358138 2.2674672 0.968984 56.723838 + 800 0.83477542 2.3658275 0.97072603 56.234689 + 850 0.84722921 2.3506233 0.97189674 56.262424 + 900 0.83526965 2.4532068 0.97248856 56.219103 + 950 0.83174583 2.4763958 0.97249527 56.409813 + 1000 0.83022557 2.4334341 0.97194093 55.890858 + 1050 0.83208978 2.3478416 0.97092452 54.934691 + 1100 0.82789545 2.272404 0.9696152 54.90894 + 1150 0.82678617 2.1798046 0.96819776 54.927782 + 1200 0.8088841 2.1960256 0.96687735 54.914327 + 1250 0.81512784 2.0736261 0.96579008 53.927291 + 1300 0.81271067 2.0297138 0.96504188 54.289698 + 1350 0.8201767 1.9493976 0.96464115 55.342131 + 1400 0.80880489 2.0016987 0.96468463 55.757758 + 1450 0.8114196 2.0282699 0.96514115 55.865676 + 1500 0.81085664 2.0838361 0.96591869 56.553425 + 1550 0.81257075 2.1283157 0.96694549 56.921544 + 1600 0.82617645 2.1017986 0.96817075 56.858808 + 1650 0.82616141 2.1885582 0.96941073 56.717917 + 1700 0.81634174 2.2996967 0.97047447 56.453745 + 1750 0.82447573 2.2924266 0.97128663 56.916813 + 1800 0.83610432 2.236456 0.97178453 56.400752 + 1850 0.82479203 2.3103493 0.97197318 55.891368 + 1900 0.82298992 2.3059289 0.97181084 55.680563 + 1950 0.82098556 2.2801003 0.97138609 55.754406 + 2000 0.8181203 2.2480175 0.97078591 55.801363 + 2050 0.82822293 2.1208884 0.97004107 55.687 + 2100 0.7976818 2.2711199 0.96930169 55.459844 + 2150 0.81817848 2.0680351 0.96860201 56.514731 + 2200 0.80707457 2.1112141 0.96810519 55.504308 + 2250 0.81651111 2.0077603 0.96781161 55.635702 + 2300 0.80634534 2.0662241 0.96777177 56.051086 + 2350 0.80892831 2.0619333 0.96799037 56.548711 + 2400 0.82454203 1.9585394 0.9684672 56.695235 + 2450 0.81517178 2.075283 0.96921622 56.613082 + 2500 0.80969595 2.1624581 0.97010528 56.57516 + 2550 0.80862964 2.2088622 0.97100774 57.072594 + 2600 0.81468816 2.2293973 0.97192868 56.879212 + 2650 0.82063107 2.2244887 0.97269715 55.454502 + 2700 0.81691618 2.2789954 0.97319841 54.421943 + 2750 0.8141787 2.2981247 0.97340453 54.469921 + 2800 0.81973871 2.2422136 0.9733278 55.959235 + 2850 0.82037399 2.201016 0.97302727 56.685826 + 2900 0.80650164 2.2672955 0.9726128 56.574395 + 2950 0.81752783 2.1317541 0.97207545 56.809412 + 3000 0.80836945 2.1461483 0.97151192 57.205206 + 3050 0.80785109 2.1189056 0.97103049 57.418763 + 3100 0.79835058 2.146416 0.97069705 57.329383 + 3150 0.79792089 2.1388267 0.97051679 57.279852 + 3200 0.79934603 2.1049562 0.97046851 56.351494 + 3250 0.79523232 2.1549779 0.97063956 56.00356 + 3300 0.8004458 2.1145975 0.97096375 55.725509 + 3350 0.79772742 2.166292 0.97143785 55.558075 + 3400 0.80621087 2.1309217 0.97198456 55.816704 + 3450 0.80540626 2.1727557 0.97263267 55.671283 + 3500 0.80867606 2.1905129 0.97321538 55.390086 + 3550 0.80917896 2.2144872 0.97370472 55.742085 + 3600 0.80930722 2.2288938 0.974093 56.23064 + 3650 0.80390523 2.2777327 0.97431886 56.084731 + 3700 0.79620093 2.3143541 0.97435103 55.942797 + 3750 0.80252393 2.2564638 0.9741875 56.042055 + 3800 0.78981264 2.3156481 0.9739121 55.971352 + 3850 0.80391951 2.1804938 0.97351088 55.855858 + 3900 0.81268129 2.0855818 0.97308521 56.288315 + 3950 0.7958182 2.175259 0.97273088 56.140141 + 4000 0.80054484 2.1163279 0.97243129 56.366818 + 4050 0.79760187 2.105362 0.97225308 56.684619 + 4100 0.79283424 2.1357603 0.972206 56.203341 + 4150 0.79543088 2.1036951 0.97227608 56.606315 + 4200 0.79410999 2.1402049 0.97253758 56.277478 + 4250 0.7985469 2.1285154 0.97293622 56.356076 + 4300 0.79700387 2.1470614 0.97337091 56.722298 + 4350 0.80479321 2.1403244 0.97384674 57.212574 + 4400 0.79505512 2.224463 0.97434415 56.561877 + 4450 0.78346648 2.3347865 0.97478611 56.681362 + 4500 0.79811284 2.259123 0.97510069 57.365929 + 4550 0.80015561 2.2345254 0.97523653 57.34799 + 4600 0.79648318 2.2651869 0.97525975 57.502318 + 4650 0.80524865 2.1943025 0.97507638 57.702488 + 4700 0.80397778 2.1758629 0.97478268 57.162107 + 4750 0.78914913 2.2470191 0.9744625 56.849565 + 4800 0.79324889 2.2028993 0.97408817 57.572344 + 4850 0.78993209 2.181763 0.97373372 57.683552 + 4900 0.79041263 2.1604768 0.97348692 56.922312 + 4950 0.79741332 2.1105901 0.97332545 57.488932 + 5000 0.7891178 2.163416 0.97328963 57.365252 +Loop time of 33.6467 on 1 procs for 5000 steps with 5120 atoms + +Performance: 51357.258 tau/day, 148.603 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 24.699 | 24.699 | 24.699 | 0.0 | 73.41 +Neigh | 2.8894 | 2.8894 | 2.8894 | 0.0 | 8.59 +Comm | 0.34907 | 0.34907 | 0.34907 | 0.0 | 1.04 +Output | 0.0056 | 0.0056 | 0.0056 | 0.0 | 0.02 +Modify | 5.5718 | 5.5718 | 5.5718 | 0.0 | 16.56 +Other | | 0.1319 | | | 0.39 + +Nlocal: 5120 ave 5120 max 5120 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 7594 ave 7594 max 7594 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 323081 ave 323081 max 323081 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 323081 +Ave neighs/atom = 63.1018 +Neighbor list builds = 248 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:33 diff --git a/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..cec9c69aff92e969c916400c4190e78160929e23 --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4 @@ -0,0 +1,187 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +lattice fcc 0.9731 +Lattice spacing in x,y,z = 1.6019 1.6019 1.6019 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379) + 1 by 1 by 4 MPI processor grid +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +Created 5120 atoms +pair_coeff 1 1 1.0 1.0 2.5 +pair_modify tail no +pair_modify shift yes +mass 1 1.0 +velocity all create 1.6 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32 + +# computing long-range order (no bias is added since k=0) +fix bias all rhok 16 0 0 0.0 0.0 + +# output +thermo 50 +thermo_style custom step temp press density f_bias[3] +# dump dumpXYZ all xyz 2000 traj.xyz + +# NOTE: this is cut short to 5000 steps for demonstration purposes +# run 100000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 21 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.23 | 3.23 | 3.23 Mbytes +Step Temp Press Density f_bias[3] + 0 1.6 -2.7568106 0.9731 71.554175 + 50 0.78457786 3.1029192 0.97362639 54.327705 + 100 0.85528971 2.4670259 0.97213457 55.189308 + 150 0.85241818 2.3210306 0.9698027 56.138125 + 200 0.82301385 2.3448692 0.96708227 55.735326 + 250 0.83076383 2.0890816 0.96425763 55.320625 + 300 0.81602823 2.0118796 0.96173925 54.095736 + 350 0.81084006 1.9122192 0.95979392 54.526429 + 400 0.80776593 1.8502174 0.95869117 54.434901 + 450 0.80694697 1.8435873 0.95851085 53.20809 + 500 0.81384248 1.8111331 0.95917305 53.419395 + 550 0.81027072 1.9222272 0.96056019 54.36723 + 600 0.81199582 2.0291945 0.96248486 54.888582 + 650 0.82507964 2.0706462 0.96467227 55.807137 + 700 0.832562 2.1471442 0.9668913 56.721267 + 750 0.83358138 2.2674672 0.968984 56.723838 + 800 0.83477542 2.3658275 0.97072603 56.234689 + 850 0.84722921 2.3506233 0.97189674 56.262424 + 900 0.83526965 2.4532068 0.97248856 56.219103 + 950 0.83174583 2.4763958 0.97249527 56.409813 + 1000 0.83022557 2.4334341 0.97194093 55.890858 + 1050 0.83208978 2.3478416 0.97092452 54.934691 + 1100 0.82789545 2.272404 0.9696152 54.90894 + 1150 0.82678617 2.1798046 0.96819776 54.927782 + 1200 0.8088841 2.1960256 0.96687735 54.914327 + 1250 0.81512784 2.0736261 0.96579008 53.927291 + 1300 0.81271067 2.0297138 0.96504188 54.289698 + 1350 0.8201767 1.9493976 0.96464115 55.342131 + 1400 0.80880489 2.0016987 0.96468463 55.757758 + 1450 0.8114196 2.0282699 0.96514115 55.865676 + 1500 0.81085664 2.0838361 0.96591869 56.553425 + 1550 0.81257075 2.1283157 0.96694549 56.921544 + 1600 0.82617645 2.1017986 0.96817075 56.858808 + 1650 0.82616141 2.1885582 0.96941073 56.717917 + 1700 0.81634174 2.2996967 0.97047447 56.453745 + 1750 0.82447573 2.2924266 0.97128663 56.916813 + 1800 0.83610432 2.236456 0.97178453 56.400752 + 1850 0.824792 2.3103491 0.97197318 55.891368 + 1900 0.82298989 2.3059287 0.97181084 55.680562 + 1950 0.82098545 2.2801009 0.97138609 55.754404 + 2000 0.81812031 2.2480166 0.97078591 55.801371 + 2050 0.82822262 2.1208887 0.97004108 55.687001 + 2100 0.79768162 2.2711186 0.9693017 55.459852 + 2150 0.81817874 2.0680317 0.96860202 56.514744 + 2200 0.80707412 2.1112032 0.96810521 55.504308 + 2250 0.81650921 2.0077757 0.96781164 55.635717 + 2300 0.80634656 2.066186 0.96777181 56.051088 + 2350 0.80893174 2.0619084 0.96799042 56.548711 + 2400 0.82453783 1.9585503 0.96846727 56.695111 + 2450 0.81517275 2.0752617 0.96921631 56.614046 + 2500 0.80969622 2.1624476 0.9701054 56.574846 + 2550 0.80861922 2.2089505 0.97100787 57.072334 + 2600 0.81468888 2.2293754 0.97192875 56.879416 + 2650 0.82061239 2.2245462 0.97269723 55.442015 + 2700 0.81687473 2.2792015 0.97319852 54.420301 + 2750 0.81416567 2.2982988 0.97340467 54.469427 + 2800 0.81978563 2.2418723 0.97332803 55.965451 + 2850 0.82069759 2.1988948 0.97302752 56.686807 + 2900 0.80631184 2.2684466 0.97261407 56.585682 + 2950 0.81759744 2.1312328 0.97207888 56.812431 + 3000 0.80748056 2.152676 0.97151807 57.178849 + 3050 0.80789237 2.118162 0.97103728 57.433724 + 3100 0.79882523 2.1414744 0.97070338 57.34686 + 3150 0.79803949 2.1359043 0.97052875 57.382544 + 3200 0.79170386 2.1548392 0.97049349 56.465806 + 3250 0.78848813 2.1990144 0.97067557 55.929088 + 3300 0.79820555 2.1304609 0.97101444 55.624487 + 3350 0.79250565 2.1971235 0.97149233 55.933615 + 3400 0.80584844 2.1417239 0.97206083 55.85922 + 3450 0.80685744 2.1640501 0.97266047 55.135963 + 3500 0.80751888 2.1858277 0.97318703 55.407581 + 3550 0.79882754 2.2796452 0.97363149 55.392366 + 3600 0.80219171 2.2715765 0.97392571 55.867887 + 3650 0.79061794 2.3492866 0.97410985 56.0192 + 3700 0.8058483 2.2327904 0.97411924 56.491303 + 3750 0.79460746 2.2941868 0.97397764 55.929912 + 3800 0.80447478 2.2018009 0.97367627 55.663208 + 3850 0.80355335 2.17638 0.97333164 55.637261 + 3900 0.80388417 2.1531434 0.9729647 56.03794 + 3950 0.79557409 2.1853318 0.9726503 56.132348 + 4000 0.79547396 2.1457051 0.97235244 55.552675 + 4050 0.8058384 2.0637678 0.97213346 56.185416 + 4100 0.7976931 2.1028246 0.97208255 56.050347 + 4150 0.79555522 2.115473 0.97216375 56.868136 + 4200 0.79324134 2.1510383 0.97246129 56.462635 + 4250 0.80788167 2.0534887 0.97287821 55.650788 + 4300 0.79389865 2.2019815 0.97337765 55.596846 + 4350 0.79786309 2.1851119 0.97389825 57.000921 + 4400 0.79986518 2.1997541 0.97443778 57.551564 + 4450 0.8063901 2.1893874 0.97493151 57.236138 + 4500 0.80005802 2.250364 0.97533075 57.341358 + 4550 0.79707443 2.2995576 0.97557554 57.338713 + 4600 0.79869949 2.2807889 0.97563277 57.084504 + 4650 0.79694427 2.2673215 0.97544638 57.025663 + 4700 0.79023986 2.2884131 0.97511483 57.131188 + 4750 0.79566823 2.2215519 0.97464304 57.045676 + 4800 0.78936986 2.2268037 0.97410626 57.384178 + 4850 0.79025913 2.1836718 0.973616 57.78438 + 4900 0.80138424 2.0657609 0.9732124 57.888266 + 4950 0.77853735 2.207944 0.97296347 57.312213 + 5000 0.79115984 2.1035893 0.97285578 57.109472 +Loop time of 9.53489 on 4 procs for 5000 steps with 5120 atoms + +Performance: 181229.223 tau/day, 524.390 timesteps/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.312 | 6.4238 | 6.5139 | 3.1 | 67.37 +Neigh | 0.72062 | 0.73538 | 0.74531 | 1.1 | 7.71 +Comm | 0.52697 | 0.64152 | 0.78688 | 14.1 | 6.73 +Output | 0.0028393 | 0.0029888 | 0.0033851 | 0.4 | 0.03 +Modify | 1.6249 | 1.669 | 1.7253 | 2.9 | 17.50 +Other | | 0.06221 | | | 0.65 + +Nlocal: 1280 ave 1289 max 1266 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Nghost: 3346.25 ave 3379 max 3331 min +Histogram: 1 2 0 0 0 0 0 0 0 1 +Neighs: 80701.8 ave 81534 max 79755 min +Histogram: 1 0 1 0 0 0 0 0 1 1 + +Total # of neighbors = 322807 +Ave neighs/atom = 63.0482 +Neighbor list builds = 248 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:09 diff --git a/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..c2aaa9a58118475f45c0c2887eec58ac8588a631 --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1 @@ -0,0 +1,186 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +read_data data.halfhalf + orthogonal box = (0 0 0) to (12.8152 12.8152 34) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 5120 atoms + reading velocities ... + 5120 velocities +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet + +velocity all create 0.8 1 mom yes rot yes +fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0 +fix 100 all momentum 100 linear 1 1 1 + +# harmonic rho_k bias-field +# nx ny nz k a +fix bias all rhok 16 0 0 4.0 26.00 + +# output U_bias rho_k_RE rho_k_IM |rho_k| +thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3] +thermo 50 +dump dumpXYZ all xyz 500 traj.xyz + +# NOTE: run reduced for demonstration purposes +# run 50000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 22 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.723 | 5.723 | 5.723 Mbytes +Step Temp Pzz PotEng Lz f_bias f_bias[1] f_bias[2] f_bias[3] + 0 0.8 5.1566801 -4.8556711 34 179.52419 35.473155 -0.2832763 35.474286 + 50 1.072533 3.8158392 -5.2704532 34.024206 0.010596224 25.927135 -0.063106738 25.927212 + 100 1.1093231 3.6703116 -5.3380965 34.094814 1.8552612 26.958236 -0.51403326 26.963136 + 150 1.1080721 3.8202601 -5.3568368 34.207473 0.39188605 26.420755 -1.0759575 26.442655 + 200 1.1435287 3.3445987 -5.4365298 34.354119 3.0758718 27.239465 -0.19115251 27.240135 + 250 1.1203046 3.4669456 -5.4293867 34.511473 0.8543814 26.652785 -0.20818214 26.653598 + 300 1.1012709 3.4583154 -5.4281803 34.664509 2.4837156 27.097674 0.9518628 27.114387 + 350 1.0439632 3.8953869 -5.368619 34.810399 0.55385719 26.518391 0.64525272 26.52624 + 400 1.0083878 4.0523864 -5.3418278 34.957669 0.34806057 26.404011 0.83368604 26.417169 + 450 0.9675244 4.310087 -5.3089468 35.114208 0.7921285 26.607512 1.077889 26.629336 + 500 0.94605476 4.1050097 -5.3062273 35.284018 0.87757754 26.639125 1.1140858 26.662411 + 550 0.92662323 3.9299063 -5.3062927 35.458565 1.3746441 26.773494 1.7256603 26.829049 + 600 0.89723165 3.7683555 -5.289725 35.629881 0.46692943 26.372973 2.4135502 26.483182 + 650 0.90612566 3.1098837 -5.3267851 35.788537 0.032662126 25.918784 3.2982102 26.127793 + 700 0.9152508 2.6527976 -5.3597799 35.923343 0.014621588 25.834591 3.6093492 26.085503 + 750 0.90156356 2.3474851 -5.3545938 36.031813 0.75225637 26.307204 4.0247215 26.613293 + 800 0.89748513 1.9825103 -5.3610202 36.113888 0.33402511 26.261326 2.7858039 26.408672 + 850 0.89496343 1.8906342 -5.3673514 36.169424 0.85431557 26.534648 2.5150347 26.653573 + 900 0.89463983 1.5654217 -5.3753283 36.20181 1.5689239 26.764737 2.5474794 26.885699 + 950 0.88663832 1.4399476 -5.3703322 36.209971 0.044436903 25.818418 1.2963356 25.850941 + 1000 0.87407208 1.485718 -5.3572665 36.195386 1.4405611 26.828072 1.0520795 26.848693 + 1050 0.87580489 1.163155 -5.3647269 36.160279 0.15319559 26.234791 1.4845964 26.276763 + 1100 0.86978111 1.3743181 -5.3594907 36.104958 1.1313537 25.19895 1.5711793 25.247885 + 1150 0.86987861 1.3212927 -5.3628503 36.035486 0.039865678 25.841762 0.93898962 25.858816 + 1200 0.87142486 1.3293818 -5.3676854 35.954411 0.16827389 25.70952 -0.14639427 25.709936 + 1250 0.87582265 1.3203803 -5.3764058 35.86575 0.25946652 25.639682 0.082696867 25.639815 + 1300 0.87371627 1.4680294 -5.375151 35.772824 0.17697069 25.701417 0.2397926 25.702535 + 1350 0.88617453 1.5923057 -5.3954912 35.681046 0.00049155526 25.973634 -0.74521794 25.984323 + 1400 0.87809636 1.5821707 -5.3850722 35.594706 0.024050814 26.107395 -0.34393685 26.10966 + 1450 0.87912192 1.7820174 -5.3885842 35.514722 0.20999914 25.667238 -0.66933655 25.675964 + 1500 0.88293618 2.0295275 -5.3963602 35.443445 0.60232374 25.376395 -1.9501461 25.451218 + 1550 0.90012542 1.9476472 -5.4249456 35.382791 0.4488038 26.448928 -1.1452474 26.473711 + 1600 0.89155063 2.2462603 -5.4153432 35.332095 0.039621687 26.138157 -0.36825239 26.140751 + 1650 0.8942624 2.343747 -5.4233433 35.294954 0.0089980332 26.064277 0.38189192 26.067075 + 1700 0.90047841 2.451289 -5.4376312 35.27234 0.86985171 26.646438 0.83408084 26.659489 + 1750 0.87586052 2.6381221 -5.4067182 35.264564 6.346204 27.652722 2.6699692 27.78132 + 1800 0.87392582 2.6338176 -5.4109056 35.270073 0.046414129 26.016188 2.6651053 26.152339 + 1850 0.86540415 2.5434301 -5.4058587 35.285902 0.054615472 26.074279 2.1799787 26.165251 + 1900 0.87043082 2.5776772 -5.4216997 35.309062 0.68978148 26.38648 3.2614091 26.587274 + 1950 0.86281992 2.3107762 -5.4188978 35.338501 0.0072672577 25.736893 3.2375012 25.93972 + 2000 0.85905576 2.2894047 -5.4215995 35.36787 0.095633435 26.072085 2.7685848 26.21867 + 2050 0.85793751 2.2382039 -5.4279351 35.395213 0.13602344 25.598457 2.6881027 25.739209 + 2100 0.85585253 2.0765811 -5.4324511 35.418877 0.0059888115 25.754128 3.1436222 25.945279 + 2150 0.86701057 1.8449875 -5.4562208 35.436124 0.097328618 25.413697 4.3268293 25.7794 + 2200 0.85168154 1.9024923 -5.4395776 35.44246 0.20764576 25.094788 5.4406104 25.677784 + 2250 0.8429719 1.870335 -5.4320586 35.438363 0.34419961 24.998478 5.4475709 25.585151 + 2300 0.84176891 1.7100228 -5.4351472 35.422863 0.76036958 24.697018 5.8629967 25.383409 + 2350 0.84601588 1.8539039 -5.4456629 35.395979 0.38437531 25.647986 6.4163366 26.438392 + 2400 0.84637647 1.6299091 -5.4498948 35.36125 0.074236719 24.995872 7.8269968 26.192661 + 2450 0.85650449 1.6828907 -5.4683101 35.316669 0.3671827 25.280669 7.7040329 26.428476 + 2500 0.84963707 1.7305222 -5.4605394 35.265508 0.1406965 25.236741 7.2780025 26.265232 + 2550 0.84084365 1.8758368 -5.4497083 35.208725 0.33937687 24.544376 7.2334512 25.588067 + 2600 0.85317342 1.7781674 -5.4702734 35.149747 0.60378248 24.046307 8.3370138 25.450554 + 2650 0.85487644 2.0065374 -5.4747643 35.090431 0.22483651 24.937101 8.4669004 26.335288 + 2700 0.84550083 1.9363031 -5.4628401 35.034349 0.43442577 24.250196 7.9943738 25.533939 + 2750 0.85843419 2.0473138 -5.484528 34.980671 0.45959294 24.17438 8.179356 25.520629 + 2800 0.86047607 2.0754522 -5.4899966 34.932466 0.00038123477 24.619856 8.3153434 25.986194 + 2850 0.86375793 2.2751324 -5.4977459 34.892337 0.0016455263 24.927259 7.289789 25.971316 + 2900 0.84438986 2.3790377 -5.4721407 34.863512 1.2372354 25.819445 7.132603 26.786523 + 2950 0.8551438 2.2721926 -5.4925958 34.84473 1.5405388 25.956466 6.976385 26.87765 + 3000 0.83737707 2.4009609 -5.4707188 34.834171 0.28507766 25.643879 6.1778846 26.377543 + 3050 0.84923235 2.4187994 -5.4938573 34.830836 0.036512025 25.139252 7.1457857 26.135115 + 3100 0.83872396 2.3811576 -5.4838787 34.833673 0.246984 24.21358 8.4588719 25.648586 + 3150 0.83957817 2.3901421 -5.4913118 34.84163 0.20477984 24.309852 10.088243 26.319984 + 3200 0.84283033 2.17292 -5.5025459 34.853975 1.3367154 24.581685 10.72011 26.817531 + 3250 0.84002379 2.1247709 -5.5044955 34.866106 0.11434509 24.463842 9.4874246 26.239108 + 3300 0.83311101 2.1492058 -5.5000847 34.875625 0.0053284993 23.815298 10.560222 26.051616 + 3350 0.83216701 1.9587594 -5.5043446 34.881623 0.58985562 23.934253 11.475462 26.543073 + 3400 0.82396039 2.1914951 -5.4971506 34.881199 0.098206955 23.393402 10.82936 25.778407 + 3450 0.83483253 1.9783612 -5.5182844 34.877327 7.6571212e-05 23.675355 10.761012 26.006188 + 3500 0.82712062 1.9718522 -5.5111818 34.869214 0.014836125 23.314122 11.312845 25.913872 + 3550 0.8342927 1.9114357 -5.5259968 34.855179 1.4050442 22.442758 11.377192 25.161834 + 3600 0.82631637 1.9836457 -5.5176244 34.835738 0.084637609 23.413286 10.824194 25.794285 + 3650 0.82425697 1.9218541 -5.5178548 34.811901 0.11000071 22.788707 12.022258 25.765478 + 3700 0.82491437 1.9624493 -5.521738 34.782417 0.034984027 23.011433 12.384217 26.132257 + 3750 0.82758167 2.0856442 -5.5283493 34.748872 0.001362163 23.030662 12.122144 26.026098 + 3800 0.81891108 1.9858824 -5.5177774 34.714618 0.17075993 23.21344 12.345683 26.292199 + 3850 0.83392227 2.1631514 -5.5426333 34.681146 0.82106473 22.510204 11.678329 25.359272 + 3900 0.82230654 2.0017132 -5.5276756 34.650221 0.48735732 23.444809 12.339117 26.493638 + 3950 0.81929288 2.1749936 -5.5256673 34.61976 0.089219805 23.540062 11.527925 26.211211 + 4000 0.83415169 2.0446791 -5.5506187 34.591266 0.15593937 23.742282 11.26508 26.279231 + 4050 0.82362522 2.1998083 -5.5375157 34.563164 0.25405351 23.913834 11.081011 26.356408 + 4100 0.82589505 2.3074345 -5.543718 34.537763 0.080213125 24.03253 10.435108 26.200266 + 4150 0.83855297 2.2424199 -5.5658171 34.517758 0.62913338 23.974257 8.5079223 25.439138 + 4200 0.82522111 2.2622619 -5.5493275 34.502472 1.8756517 25.754617 7.9996898 26.968414 + 4250 0.82083124 2.4135193 -5.5464932 34.4919 1.1217436 25.8944 6.7070444 26.748914 + 4300 0.83059704 2.1375109 -5.5653245 34.487366 0.53623038 26.05979 4.9072346 26.517798 + 4350 0.82755047 2.1159821 -5.5650889 34.484506 0.10017723 25.405936 4.3532342 25.776195 + 4400 0.83192877 2.180851 -5.5759565 34.480909 0.053664012 25.993034 2.9844338 26.163805 + 4450 0.81860572 2.2333381 -5.5602138 34.477183 0.037864077 25.792233 1.9038859 25.862406 + 4500 0.82821762 2.1142023 -5.5788682 34.474784 0.088221344 26.20329 0.59417897 26.210025 + 4550 0.8205154 2.0896984 -5.5715531 34.472405 0.016076192 26.083166 -0.58187024 26.089655 + 4600 0.81294948 2.2274108 -5.5642678 34.469014 0.033774986 25.869616 0.14951307 25.870048 + 4650 0.80890532 2.1556346 -5.5622407 34.465277 0.67402048 25.413229 -0.56341819 25.419474 + 4700 0.82070227 1.9852605 -5.583747 34.460206 0.052623237 26.158394 -0.44673492 26.162209 + 4750 0.81451857 2.1097726 -5.5779782 34.451438 0.12221733 25.733718 -0.9911436 25.752798 + 4800 0.81300453 2.0211325 -5.5790076 34.439504 0.34536082 26.358606 -1.7335167 26.415548 + 4850 0.82035497 1.9489595 -5.5929886 34.424097 0.70899626 26.575865 -1.0191012 26.595397 + 4900 0.8127066 2.1312269 -5.584271 34.405998 0.087959314 26.185217 -1.1329105 26.209713 + 4950 0.81252621 2.1094866 -5.5866296 34.387869 0.79067667 26.564722 -1.8456354 26.628759 + 5000 0.80575936 2.1875995 -5.579054 34.370679 0.031787364 26.027557 -2.2666774 26.12607 +Loop time of 32.2397 on 1 procs for 5000 steps with 5120 atoms + +Performance: 53598.557 tau/day, 155.088 timesteps/s +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 22.967 | 22.967 | 22.967 | 0.0 | 71.24 +Neigh | 2.9914 | 2.9914 | 2.9914 | 0.0 | 9.28 +Comm | 0.37485 | 0.37485 | 0.37485 | 0.0 | 1.16 +Output | 0.064337 | 0.064337 | 0.064337 | 0.0 | 0.20 +Modify | 5.7143 | 5.7143 | 5.7143 | 0.0 | 17.72 +Other | | 0.1281 | | | 0.40 + +Nlocal: 5120 ave 5120 max 5120 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 7962 ave 7962 max 7962 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 296101 ave 296101 max 296101 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 296101 +Ave neighs/atom = 57.8322 +Neighbor list builds = 283 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:32 diff --git a/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..c7a77fb9cca5d5c9b190946d6a47459ffd048952 --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4 @@ -0,0 +1,186 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes +read_data data.halfhalf + orthogonal box = (0 0 0) to (12.8152 12.8152 34) + 1 by 1 by 4 MPI processor grid + reading atoms ... + 5120 atoms + reading velocities ... + 5120 velocities +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet + +velocity all create 0.8 1 mom yes rot yes +fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0 +fix 100 all momentum 100 linear 1 1 1 + +# harmonic rho_k bias-field +# nx ny nz k a +fix bias all rhok 16 0 0 4.0 26.00 + +# output U_bias rho_k_RE rho_k_IM |rho_k| +thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3] +thermo 50 +dump dumpXYZ all xyz 500 traj.xyz + +# NOTE: run reduced for demonstration purposes +# run 50000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 22 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 4.023 | 4.027 | 4.03 Mbytes +Step Temp Pzz PotEng Lz f_bias f_bias[1] f_bias[2] f_bias[3] + 0 0.8 5.1872229 -4.8361269 34 152.02869 34.711006 -0.72709593 34.71862 + 50 1.0819371 3.9250728 -5.2655842 34.02563 0.51385908 26.505979 0.2187864 26.506882 + 100 1.1182271 3.5397251 -5.3331401 34.100753 2.1059904 27.025883 -0.12127124 27.026156 + 150 1.1121434 3.7845571 -5.3440494 34.213993 1.7206575 26.894862 -1.3261751 26.927539 + 200 1.1446439 3.4114364 -5.4199119 34.358914 2.383615 27.054401 -1.4211151 27.091699 + 250 1.1115073 3.5603047 -5.3988013 34.517397 0.60819391 26.51404 -1.4089688 26.55145 + 300 1.0828478 3.7411116 -5.3842818 34.673948 0.73987465 26.528178 -2.062382 26.608225 + 350 1.0342597 3.9206217 -5.3384367 34.825105 0.99965014 26.657737 -1.6211152 26.706983 + 400 1.0064356 3.9929044 -5.324003 34.97579 0.41927007 26.402623 -1.7087432 26.457859 + 450 0.96799277 4.2764255 -5.2947175 35.133839 0.77739461 26.503753 -2.5217998 26.623456 + 500 0.94691076 4.1007962 -5.2922124 35.301893 0.17015805 26.212252 -2.0421698 26.291683 + 550 0.93675297 3.7404088 -5.3056917 35.474301 0.56247039 26.335926 -3.205722 26.530316 + 600 0.92707577 3.5439822 -5.3176094 35.641282 0.04054693 25.679461 -3.0301039 25.857615 + 650 0.91828226 3.1833648 -5.3278237 35.794766 0.8427054 26.4003 -3.6331211 26.649117 + 700 0.9084826 2.8374306 -5.3327944 35.928138 1.5635222 26.605971 -3.8575939 26.884173 + 750 0.91219559 2.46172 -5.3548299 36.039156 6.3772911 27.350725 -4.8971146 27.785678 + 800 0.90000337 2.2187716 -5.3499181 36.126451 4.9080838 27.085156 -5.1291678 27.566538 + 850 0.9003432 1.8634244 -5.3614648 36.189019 0.0081092188 25.497333 -5.4038153 26.063676 + 900 0.89741573 1.5697398 -5.3660799 36.226074 0.011155479 25.312724 -6.2574069 26.074684 + 950 0.88871454 1.4427205 -5.3604669 36.237407 0.3287078 25.659237 -6.232896 26.405406 + 1000 0.88606353 1.3088636 -5.3626576 36.226015 0.30068168 24.554896 -7.2832017 25.612262 + 1050 0.88527541 1.3194263 -5.3666773 36.19311 0.10646314 24.514921 -7.9419424 25.76928 + 1100 0.87522001 1.2852124 -5.3556811 36.143056 0.13675329 24.865981 -8.446822 26.261489 + 1150 0.8805978 1.246973 -5.3671716 36.0781 0.00043275463 24.187039 -9.4985495 25.98529 + 1200 0.85711495 1.376588 -5.3346243 36.002427 0.47623639 23.691349 -9.4648541 25.512026 + 1250 0.88116805 1.3562001 -5.3731036 35.919289 0.32797055 23.322103 -10.54448 25.595049 + 1300 0.87178482 1.5046564 -5.3610798 35.831278 0.17704849 24.190231 -10.314689 26.29753 + 1350 0.87022621 1.6830825 -5.3603618 35.743318 0.0052854997 23.731157 -10.747465 26.051408 + 1400 0.89019669 1.6144812 -5.3921986 35.659687 1.4152796 22.8393 -10.551347 25.158787 + 1450 0.88852819 1.7587964 -5.3918592 35.580319 0.63560961 23.599033 -12.195 26.563742 + 1500 0.89029085 1.8772498 -5.3966098 35.509232 0.20895386 23.055083 -12.703366 26.323229 + 1550 0.88639722 2.2284824 -5.3933288 35.449043 0.44413965 22.448774 -12.156068 25.528757 + 1600 0.88816451 2.2167704 -5.3994757 35.401661 0.12210235 23.108351 -12.44643 26.247085 + 1650 0.89154791 2.3397824 -5.4086923 35.365815 0.4820208 23.090699 -12.984179 26.490928 + 1700 0.88518032 2.5351236 -5.4041601 35.343757 0.080806002 22.749825 -12.99762 26.201005 + 1750 0.86848721 2.5527491 -5.3851928 35.336433 0.045102165 22.357111 -13.564328 26.15017 + 1800 0.88501061 2.5215825 -5.4169341 35.340849 0.27488483 22.086584 -14.408273 26.370732 + 1850 0.8716061 2.5809558 -5.4045854 35.355038 0.042909785 21.270956 -14.695278 25.853525 + 1900 0.85672517 2.4836326 -5.3902797 35.375469 0.72877764 21.639909 -15.474764 26.603646 + 1950 0.85133731 2.3141629 -5.3902573 35.398523 0.0016908803 21.106617 -15.132733 25.970924 + 2000 0.86152109 2.1562002 -5.4132601 35.419851 0.371016 21.325237 -15.614625 26.430706 + 2050 0.86243551 2.019931 -5.4220349 35.436069 0.017935421 20.4131 -16.255418 26.094698 + 2100 0.87417672 1.8083823 -5.4464117 35.445091 0.18429432 19.75625 -17.365705 26.303558 + 2150 0.85872128 1.7608768 -5.4293103 35.44341 0.91209166 20.149648 -17.480387 26.675312 + 2200 0.86615373 1.8372778 -5.4458315 35.430616 0.10151993 18.559234 -17.885469 25.7747 + 2250 0.85053605 1.7198437 -5.4272104 35.408688 0.96154548 17.200861 -18.562206 25.306622 + 2300 0.85400281 1.7939644 -5.4364682 35.377708 0.12283263 18.759325 -18.358539 26.247823 + 2350 0.85495278 1.5856029 -5.4417321 35.337987 0.20564329 18.967923 -18.248149 26.320658 + 2400 0.84606771 1.7782708 -5.4315646 35.287411 0.10063977 19.185527 -17.878215 26.224321 + 2450 0.85210051 1.8190391 -5.4432116 35.232321 0.69988647 19.268861 -18.325448 26.59156 + 2500 0.85304715 1.7466204 -5.4470889 35.175245 0.0048314937 18.09176 -18.74157 26.04915 + 2550 0.85401123 1.8601945 -5.4509309 35.115748 0.99467901 17.170045 -18.574587 25.294777 + 2600 0.85778606 1.974012 -5.4586742 35.058013 0.0026599702 17.438966 -19.333395 26.036469 + 2650 0.8521239 2.0606329 -5.4526006 35.003616 0.091056354 17.16363 -19.244738 25.786627 + 2700 0.85918482 2.0766792 -5.4658947 34.954171 0.89590606 15.77108 -19.822153 25.330707 + 2750 0.85786577 2.225549 -5.4667773 34.911468 0.26577575 15.769018 -21.128817 26.364538 + 2800 0.86764664 2.2325018 -5.4849414 34.877604 0.47167555 14.950515 -20.675229 25.514369 + 2850 0.85209564 2.3434319 -5.465734 34.852715 2.7350296 13.51553 -20.829996 24.830592 + 2900 0.85757283 2.3512971 -5.4786051 34.836138 0.14816492 14.06033 -21.545946 25.727819 + 2950 0.86098926 2.3480431 -5.4890615 34.826408 0.26401534 13.381395 -22.714827 26.363329 + 3000 0.85413421 2.3243973 -5.4844129 34.823242 0.024244334 12.739486 -22.538687 25.889899 + 3050 0.85015323 2.5479266 -5.4844303 34.825228 0.4463147 12.990582 -21.975063 25.527605 + 3100 0.8530523 2.3643505 -5.495343 34.834883 0.12144265 12.844293 -22.321989 25.753583 + 3150 0.85098478 2.2521299 -5.4990526 34.848419 0.33194916 12.747856 -23.126671 26.4074 + 3200 0.84391449 2.2650057 -5.495222 34.862626 0.031888328 12.788845 -22.782174 26.12627 + 3250 0.84807155 2.1715388 -5.5080873 34.877548 0.082426694 13.316219 -22.09441 25.796989 + 3300 0.83028242 2.242889 -5.4878846 34.89175 1.1334975 14.326678 -22.593363 26.752827 + 3350 0.82924001 2.0324002 -5.4924558 34.903232 0.35473989 14.354166 -22.181868 26.421153 + 3400 0.83032841 2.0003371 -5.4997142 34.908733 0.041677437 14.528378 -21.735998 26.144356 + 3450 0.82908891 1.8683902 -5.5029185 34.907936 0.02365857 15.069507 -21.053887 25.891237 + 3500 0.82842914 1.9165344 -5.5064218 34.898681 0.17663531 15.27043 -20.674834 25.702817 + 3550 0.82735822 1.98221 -5.5088197 34.88272 1.5607134 14.915228 -20.208431 25.116622 + 3600 0.82642915 1.8422766 -5.5110752 34.8611 1.1861112 15.312314 -20.051953 25.229899 + 3650 0.82556781 1.9351408 -5.5130349 34.833406 1.018872 16.152478 -19.454871 25.286252 + 3700 0.82360651 1.9791184 -5.5128431 34.802021 0.14080727 16.907104 -19.401616 25.734663 + 3750 0.83017793 1.9855734 -5.5253254 34.768644 0.15311334 16.969506 -19.331958 25.723311 + 3800 0.82362926 2.1029656 -5.5179624 34.734178 0.10807487 17.892584 -18.542426 25.76754 + 3850 0.82313508 2.0781681 -5.5196175 34.70093 0.13343085 19.072706 -17.28778 25.741707 + 3900 0.83643385 2.0570262 -5.5421224 34.669761 0.00022792038 19.551677 -17.1548 26.010675 + 3950 0.82346174 2.0842322 -5.5252757 34.640849 0.0093759386 20.892792 -15.590263 26.068469 + 4000 0.83485868 2.1196669 -5.5451736 34.612396 0.31198053 21.630258 -15.126984 26.394956 + 4050 0.82729429 2.2033274 -5.5365945 34.585721 0.53752252 21.283533 -14.011497 25.481578 + 4100 0.82040242 2.1757309 -5.5292269 34.562271 0.36031984 22.047609 -12.961927 25.575548 + 4150 0.81932521 2.285666 -5.5307807 34.542102 0.84343149 22.486289 -11.70555 25.350604 + 4200 0.83819319 2.231174 -5.5625532 34.526447 0.47190752 23.311855 -12.57189 26.485751 + 4250 0.82542274 2.1874789 -5.5472057 34.513795 0.70518398 23.411553 -12.614639 26.593795 + 4300 0.81971158 2.241167 -5.5424504 34.503969 0.26707612 23.089805 -12.727793 26.365429 + 4350 0.83255377 2.1295532 -5.5657895 34.496326 0.072548591 23.003138 -12.52181 26.190458 + 4400 0.8128474 2.3327845 -5.5402264 34.490126 0.0013023434 23.020811 -12.029795 25.974482 + 4450 0.82013491 2.3069915 -5.5554953 34.488039 0.041123896 23.632908 -11.178674 26.143394 + 4500 0.81411544 2.2247193 -5.5509183 34.488014 0.54440601 23.010678 -10.938506 25.478269 + 4550 0.82814624 2.1142779 -5.5763482 34.487885 0.1518945 23.696817 -11.351972 26.275585 + 4600 0.82929492 2.090881 -5.5823492 34.486698 0.0045520899 23.538527 -10.929741 25.952292 + 4650 0.81061417 1.9818043 -5.5584018 34.484038 0.012526806 23.993543 -10.219174 26.079142 + 4700 0.81816105 1.9605811 -5.5735005 34.476764 1.2079835 25.151166 -9.1888856 26.777169 + 4750 0.81657042 2.0064313 -5.5744795 34.465784 1.2045017 25.487486 -8.2063886 26.776048 + 4800 0.81789335 2.0838696 -5.5796632 34.451996 0.27642542 24.647157 -7.023095 25.62823 + 4850 0.80649339 1.9892413 -5.5654796 34.436067 0.024697945 25.09823 -6.3492244 25.888874 + 4900 0.81673441 2.0125635 -5.5835037 34.416236 0.0011188576 25.446818 -5.2182483 25.976348 + 4950 0.82250033 1.9770391 -5.5946082 34.394723 0.72696707 26.37002 -3.5122842 26.602896 + 5000 0.80762758 2.075517 -5.5746076 34.371696 0.12796344 26.102184 -2.8094827 26.252946 +Loop time of 10.3394 on 4 procs for 5000 steps with 5120 atoms + +Performance: 167127.370 tau/day, 483.586 timesteps/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.029 | 6.0128 | 7.1918 | 35.0 | 58.15 +Neigh | 0.65673 | 0.75825 | 0.88597 | 10.3 | 7.33 +Comm | 0.43982 | 1.5284 | 2.4112 | 60.5 | 14.78 +Output | 0.022835 | 0.023039 | 0.023453 | 0.2 | 0.22 +Modify | 1.7294 | 1.9472 | 2.5687 | 25.7 | 18.83 +Other | | 0.06978 | | | 0.67 + +Nlocal: 1280 ave 1404 max 1214 min +Histogram: 2 0 0 1 0 0 0 0 0 1 +Nghost: 3521.25 ave 3581 max 3426 min +Histogram: 1 0 0 0 0 0 1 1 0 1 +Neighs: 73872.2 ave 87973 max 64161 min +Histogram: 1 1 0 0 1 0 0 0 0 1 + +Total # of neighbors = 295489 +Ave neighs/atom = 57.7127 +Neighbor list builds = 278 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:10 diff --git a/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..8606d4ed944137c289d272ad94a0eb47ad5bee86 --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1 @@ -0,0 +1,141 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes + +# fcc lattice +lattice fcc 0.9731 +Lattice spacing in x,y,z = 1.6019 1.6019 1.6019 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379) + 1 by 1 by 1 MPI processor grid +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +Created 5120 atoms +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 +change_box all z final 0.0 34 remap units box + orthogonal box = (0 0 0) to (12.8152 12.8152 34) + +# select particles in one side of the elongated box +region left plane 0 0 10 0 0 1 +group left region left +2688 atoms in group left + +velocity left create 6.0 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble left nve # Note: only move particle in left-hand side +fix langevin left langevin 3.0 0.8 100.0 2017 + +# outout +thermo_style custom step temp pzz pe lz +thermo 100 +# dump dumpXYZ all xyz 100 traj.xyz + +# run reduced for demonstration purposes +# run 10000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 22 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 4.524 | 4.524 | 4.524 Mbytes +Step Temp Pzz PotEng Lz + 0 3.1494433 -3.4735106 -6.8707307 34 + 100 1.7727555 6.5330255 -4.8035477 34 + 200 1.7462368 7.0070325 -4.7646426 34 + 300 1.7564888 6.6190123 -4.7894637 34 + 400 1.7641186 6.609684 -4.8064772 34 + 500 1.7383511 6.7304936 -4.7708095 34 + 600 1.731708 6.8574656 -4.7612918 34 + 700 1.7332167 6.6530919 -4.7670014 34 + 800 1.7487537 6.5644963 -4.7907458 34 + 900 1.7353648 6.7115188 -4.7772149 34 + 1000 1.728878 6.4175719 -4.7797216 34 + 1100 1.7471488 6.5346083 -4.813376 34 + 1200 1.7188149 6.2502104 -4.7822235 34 + 1300 1.7151194 6.792534 -4.7781701 34 + 1400 1.7406603 6.6639592 -4.8170174 34 + 1500 1.7090537 6.4677579 -4.770701 34 + 1600 1.7014954 6.2853535 -4.7679742 34 + 1700 1.7064354 6.4352857 -4.7812978 34 + 1800 1.7169971 6.5808758 -4.799426 34 + 1900 1.6822712 6.3746758 -4.7522464 34 + 2000 1.7126546 6.534969 -4.8091595 34 + 2100 1.7086108 6.4679932 -4.8146664 34 + 2200 1.6974952 6.3802129 -4.8052505 34 + 2300 1.6868035 6.4009243 -4.7935769 34 + 2400 1.7107125 6.2318869 -4.8358765 34 + 2500 1.660241 6.4891487 -4.7661183 34 + 2600 1.6801816 6.1988356 -4.8024291 34 + 2700 1.6940298 6.1328187 -4.8290053 34 + 2800 1.6755061 6.4150693 -4.8145473 34 + 2900 1.6749928 6.4248792 -4.8213509 34 + 3000 1.6310737 6.6491291 -4.7673027 34 + 3100 1.6559915 6.2726719 -4.8109181 34 + 3200 1.6574579 5.7132029 -4.8189484 34 + 3300 1.6816136 5.7697439 -4.8652811 34 + 3400 1.6489483 6.4463349 -4.8247812 34 + 3500 1.6557974 5.9763333 -4.8383712 34 + 3600 1.6215459 6.2806534 -4.7954657 34 + 3700 1.6484987 6.0671609 -4.8470777 34 + 3800 1.6473922 5.8688108 -4.8555351 34 + 3900 1.6435957 5.930425 -4.8562076 34 + 4000 1.6514434 6.1962122 -4.872998 34 + 4100 1.6138337 6.4808124 -4.8219373 34 + 4200 1.6215239 5.9467966 -4.8412146 34 + 4300 1.6129295 5.9377323 -4.8414596 34 + 4400 1.6020549 6.1104301 -4.8395939 34 + 4500 1.6047738 6.0816222 -4.8538151 34 + 4600 1.6053565 6.183466 -4.8686817 34 + 4700 1.6088152 5.7416542 -4.894114 34 + 4800 1.5954309 5.694319 -4.8840198 34 + 4900 1.5582564 6.1199614 -4.8429998 34 + 5000 1.5786672 5.8813574 -4.8907344 34 +Loop time of 28.3867 on 1 procs for 5000 steps with 5120 atoms + +Performance: 60873.483 tau/day, 176.139 timesteps/s +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 22.269 | 22.269 | 22.269 | 0.0 | 78.45 +Neigh | 4.7222 | 4.7222 | 4.7222 | 0.0 | 16.64 +Comm | 0.40821 | 0.40821 | 0.40821 | 0.0 | 1.44 +Output | 0.0042329 | 0.0042329 | 0.0042329 | 0.0 | 0.01 +Modify | 0.88231 | 0.88231 | 0.88231 | 0.0 | 3.11 +Other | | 0.1005 | | | 0.35 + +Nlocal: 5120 ave 5120 max 5120 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 7768 ave 7768 max 7768 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 297167 ave 297167 max 297167 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 297167 +Ave neighs/atom = 58.0404 +Neighbor list builds = 474 +Dangerous builds = 246 +write_data data.halfhalf +Total wall time: 0:00:28 diff --git a/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..14088f7c95cfd220b3895a8d3861a70ec093c849 --- /dev/null +++ b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4 @@ -0,0 +1,141 @@ +LAMMPS (22 Sep 2017) + using 1 OpenMP thread(s) per MPI task +units lj +dimension 3 +boundary p p p +atom_style atomic + +# truncated and shifted LJ potential +pair_style lj/cut 2.5 +pair_modify shift yes + +# fcc lattice +lattice fcc 0.9731 +Lattice spacing in x,y,z = 1.6019 1.6019 1.6019 +region my_box block 0 8.0 0 8.0 0 20.0 +create_box 1 my_box +Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379) + 1 by 1 by 4 MPI processor grid +region particles block 0 8.0 0 8.0 0 20.0 +create_atoms 1 region particles +Created 5120 atoms +pair_coeff 1 1 1.0 1.0 2.5 +mass 1 1.0 +change_box all z final 0.0 34 remap units box + orthogonal box = (0 0 0) to (12.8152 12.8152 34) + +# select particles in one side of the elongated box +region left plane 0 0 10 0 0 1 +group left region left +2688 atoms in group left + +velocity left create 6.0 1 mom yes rot yes + +# simulation parameters +neighbor 0.6 bin +timestep 0.004 +run_style verlet +fix ensemble left nve # Note: only move particle in left-hand side +fix langevin left langevin 3.0 0.8 100.0 2017 + +# outout +thermo_style custom step temp pzz pe lz +thermo 100 +# dump dumpXYZ all xyz 100 traj.xyz + +# run reduced for demonstration purposes +# run 10000 +run 5000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1 + ghost atom cutoff = 3.1 + binsize = 1.55, bins = 9 9 22 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.23 | 3.23 | 3.23 Mbytes +Step Temp Pzz PotEng Lz + 0 3.1494433 -3.4735106 -6.8707307 34 + 100 1.7914373 6.4805818 -4.8420353 34 + 200 1.740256 6.6108149 -4.7672571 34 + 300 1.7663827 6.5188941 -4.8103672 34 + 400 1.7440644 6.5156543 -4.7769467 34 + 500 1.7471724 6.5208992 -4.7843928 34 + 600 1.7320106 6.6557835 -4.7654637 34 + 700 1.6839043 6.7689759 -4.7045352 34 + 800 1.7216746 6.66436 -4.7601673 34 + 900 1.7342542 6.3242367 -4.7790803 34 + 1000 1.7338566 6.5803438 -4.7854529 34 + 1100 1.7328856 6.3846366 -4.7902625 34 + 1200 1.7546906 6.5048137 -4.8213443 34 + 1300 1.7163891 6.3903221 -4.7665145 34 + 1400 1.7011627 6.5782672 -4.7517875 34 + 1500 1.7105234 6.5811813 -4.7677748 34 + 1600 1.7334403 6.5032837 -4.8067749 34 + 1700 1.7252102 6.5443871 -4.8058994 34 + 1800 1.721958 6.3378188 -4.8150073 34 + 1900 1.6797892 6.6780506 -4.7538618 34 + 2000 1.7001774 6.3578192 -4.7894018 34 + 2100 1.7127784 6.3219105 -4.8161059 34 + 2200 1.696825 6.536793 -4.7946902 34 + 2300 1.6704578 6.7186933 -4.7609628 34 + 2400 1.6772498 6.3432817 -4.7778471 34 + 2500 1.7073862 6.2153226 -4.8299181 34 + 2600 1.6951557 6.4397257 -4.8156787 34 + 2700 1.6845984 6.0123544 -4.8136864 34 + 2800 1.6550565 6.2489392 -4.7829639 34 + 2900 1.6892315 6.158499 -4.8423004 34 + 3000 1.6814436 6.07976 -4.8400696 34 + 3100 1.6387025 6.330166 -4.7878978 34 + 3200 1.6747855 6.0767043 -4.8481995 34 + 3300 1.6508768 6.2749233 -4.8181888 34 + 3400 1.6426364 6.3934935 -4.8223824 34 + 3500 1.6576512 6.0638185 -4.8559078 34 + 3600 1.6444173 6.1376573 -4.8463113 34 + 3700 1.6480039 5.9943705 -4.8601776 34 + 3800 1.6467212 6.0556591 -4.8722719 34 + 3900 1.6271804 6.116738 -4.8547278 34 + 4000 1.6158134 5.9089534 -4.8477829 34 + 4100 1.6388157 5.9890465 -4.8920284 34 + 4200 1.6182368 6.0639887 -4.8724963 34 + 4300 1.647633 5.6333906 -4.9267536 34 + 4400 1.5856411 6.2675475 -4.8471239 34 + 4500 1.5773417 6.1789163 -4.8469057 34 + 4600 1.6181445 5.7988068 -4.922419 34 + 4700 1.5876712 5.7398111 -4.8853849 34 + 4800 1.5708353 6.2204997 -4.8718872 34 + 4900 1.5514708 5.9782256 -4.8523812 34 + 5000 1.553347 5.9286523 -4.86582 34 +Loop time of 8.10259 on 4 procs for 5000 steps with 5120 atoms + +Performance: 213265.164 tau/day, 617.087 timesteps/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.2964 | 5.6236 | 5.8982 | 9.0 | 69.40 +Neigh | 1.0562 | 1.1907 | 1.3257 | 8.7 | 14.70 +Comm | 0.43963 | 0.98786 | 1.5968 | 42.5 | 12.19 +Output | 0.0023124 | 0.004741 | 0.0090873 | 4.0 | 0.06 +Modify | 0.018652 | 0.22213 | 0.39884 | 36.4 | 2.74 +Other | | 0.07357 | | | 0.91 + +Nlocal: 1280 ave 1337 max 1211 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Nghost: 3416.25 ave 3549 max 3297 min +Histogram: 2 0 0 0 0 0 0 0 1 1 +Neighs: 74269.8 ave 77932 max 69612 min +Histogram: 1 0 0 0 0 1 0 1 0 1 + +Total # of neighbors = 297079 +Ave neighs/atom = 58.0232 +Neighbor list builds = 474 +Dangerous builds = 247 +write_data data.halfhalf +Total wall time: 0:00:08 diff --git a/examples/USER/uef/README b/examples/USER/uef/README new file mode 100644 index 0000000000000000000000000000000000000000..aaf53fa2a115a38ec297f70e0db67b6503f2a180 --- /dev/null +++ b/examples/USER/uef/README @@ -0,0 +1,19 @@ +This directory contains two short example scripts for the USER-UEF +package. + + +nvt_uniaxial: + A simulation of a WCA fluid under constant volume uniaxial strain + with stretching in the z direction, compression in the x and y + directions.The xx, yy and zz components of the pressure tensor are + written to the log file. The simulation conditions are the same + used in the paper by Thomas Hunt [http://arxiv.org/abs/1310.3905] + and should yield an zz pressure near 5 and xx/yy pressures near 10. + +npt_biaxial: + A simulation of a WCA fluid under stress-controlled biaxial strain + with stretching in the x and y directions, compression in the z + direction. The zz stress is controlled using the ext keyword in fix + npt/uef. The xx yy and zz components of the pressure tensor are + written to the log file. The simulations conditions should yield + xx/yy pressures near 5 and zz pressure near 10 diff --git a/examples/USER/uef/npt_biaxial/data.wca b/examples/USER/uef/npt_biaxial/data.wca new file mode 100644 index 0000000000000000000000000000000000000000..889ba4d2f2a8d2cb3892bda144544f23140ccc03 --- /dev/null +++ b/examples/USER/uef/npt_biaxial/data.wca @@ -0,0 +1,1022 @@ +LAMMPS data file via write_data, version 14 May 2016, timestep = 400000 + +500 atoms +1 atom types + +0.0000000000000000e+00 8.3979809569125372e+00 xlo xhi +0.0000000000000000e+00 8.3979809569125372e+00 ylo yhi +0.0000000000000000e+00 8.3979809569125372e+00 zlo zhi + +Masses + +1 1 + +Pair Coeffs # lj/cut + +1 1 1 + +Atoms # atomic + +7 1 2.4137737201272674e-01 2.0017433398687118e-01 3.6157393311703534e-02 2 1 2 +398 1 1.6739594898193126e+00 3.0313452824803544e-01 5.7986723161362197e-01 -3 0 1 +173 1 6.1370793327804556e+00 4.4978954867119525e-01 3.6568951377817088e-01 -4 1 0 +462 1 7.8754716784931862e+00 5.2908038473333074e-01 7.9185633743762940e-01 1 1 -2 +383 1 6.5373096080170212e-01 1.5337525983981986e+00 6.3208419126059423e-01 2 0 1 +288 1 1.9354124990672374e+00 1.3318105136786291e+00 3.6211635210084403e-01 -1 1 0 +303 1 2.9661308460819318e+00 1.1444058564802859e+00 6.5604777151148241e-01 -2 2 1 +491 1 7.1514675802966758e+00 1.3588685826107376e+00 1.2875068928885325e-01 -1 0 0 +187 1 3.0670386025081497e-01 2.4850915964494620e+00 7.0944157374329464e-02 0 -1 0 +163 1 1.2805212773629451e+00 2.4034621328433090e+00 2.0184086197146742e-01 0 1 -1 +345 1 2.5952244948945173e+00 2.3405536448220743e+00 3.0019149048630317e-01 -3 2 1 +447 1 3.9059163101716741e+00 1.6849179478858272e+00 6.0306488750926446e-01 -2 3 -4 +108 1 4.9215629854759335e+00 2.5174843149522088e+00 6.2779912940916158e-02 1 0 -1 +427 1 6.3978861821382305e+00 1.9201774263431104e+00 4.3981049730412797e-01 -1 -2 -3 +96 1 7.4061541304605534e+00 2.4828308207907996e+00 2.4705543772216432e-01 0 -3 2 +494 1 8.0759482343563711e+00 1.6914241739458744e+00 6.8482420189075921e-01 -3 1 2 +360 1 3.1886448226656765e+00 3.1126224743956139e+00 5.4052217066940123e-01 1 1 -2 +179 1 4.2399500260508116e+00 2.8640916432630559e+00 7.5278462735962870e-01 -1 1 1 +136 1 6.2603021059339365e+00 3.2638426060411403e+00 9.9163685662983045e-02 -2 0 3 +311 1 8.0699058291845773e+00 3.1228722160017734e+00 7.5458759573094458e-01 -3 -3 3 +242 1 8.1363443818268044e-02 3.8061968817045986e+00 5.4644298839992535e-02 1 1 1 +451 1 1.9291602479953753e+00 3.5896469823727863e+00 2.3008537852985023e-01 -1 0 0 +118 1 5.3566912079946780e+00 3.5234509548370374e+00 6.7801317822012042e-01 1 -3 -1 +200 1 6.7588369512940338e-01 6.1719413314115790e-01 9.9275606042642373e-01 -1 2 3 +23 1 2.1873327028525060e+00 4.2754803645009956e-02 1.4143560341384132e+00 -3 -1 0 +30 1 3.9592898204338112e+00 6.8093268022015185e-01 1.2987004742604558e+00 -1 -2 -2 +26 1 5.1175076813830467e+00 5.1869033157649591e-01 9.5797839381311178e-01 0 1 2 +452 1 6.5582763761917411e+00 3.4514602671261341e-01 1.2368598815410630e+00 -2 0 -2 +439 1 8.3865135868155445e+00 1.4825845267600596e-01 1.6737363681796917e+00 -2 3 1 +492 1 1.6645816560197648e+00 9.7557398318995125e-01 1.3973497820249918e+00 -2 3 1 +287 1 2.8845951500519784e+00 9.2974386224238703e-01 1.6104516406361071e+00 2 -1 2 +87 1 5.9282888423658395e+00 1.2716232081523247e+00 1.1744299163086194e+00 -1 2 -1 +191 1 7.0465887128985001e+00 1.3447129135151838e+00 1.1378718594147066e+00 -1 2 0 +316 1 6.6785856436835267e-01 2.4243200282222617e+00 1.1253836626025919e+00 0 1 0 +176 1 1.8165460632802826e+00 2.1437252128100086e+00 1.0868234266569741e+00 1 1 1 +244 1 3.5041229236927576e+00 2.3924032537771125e+00 1.3551249499971583e+00 -3 1 4 +463 1 4.9238071200835165e+00 1.7488964460038467e+00 8.7393010597649756e-01 -2 2 1 +101 1 4.7502017837518187e+00 2.5070270251466997e+00 1.6042073679153410e+00 -3 0 -2 +456 1 6.7262574842655534e+00 2.1937608587635533e+00 1.5819520420856528e+00 0 2 3 +90 1 7.8106680805872131e+00 2.3465270292028992e+00 1.4066520475037838e+00 0 -4 1 +457 1 2.2543678933961773e+00 3.1652213402916889e+00 1.1451988834668292e+00 -1 0 0 +344 1 5.9751963550424136e+00 2.8740523385222234e+00 1.2530570351392452e+00 0 1 -1 +399 1 8.2525481582297566e-01 3.4796551803028120e+00 1.0452539946382762e+00 1 -1 -1 +472 1 2.5500022667255768e+00 4.1144011701979659e+00 1.1937493716055980e+00 0 1 2 +364 1 3.2074279852156709e+00 3.3909867375667604e+00 1.5492265188845586e+00 -1 0 0 +333 1 4.2063776055408209e+00 3.7436012461483292e+00 1.2983951160694676e+00 -1 1 1 +223 1 6.0720407506223024e+00 3.9437056464141231e+00 1.5634726007362729e+00 0 0 1 +277 1 6.8850289844945918e+00 3.5744109988378070e+00 1.1843729982426427e+00 2 0 0 +487 1 8.0236797280148657e+00 4.0759691449476652e+00 1.1816920447826709e+00 -1 0 5 +202 1 1.1425010515906946e+00 1.1629395296284512e-02 2.5086432758529211e+00 -1 -2 0 +139 1 2.3147555111337756e+00 1.8665688332261610e-01 2.4220734387693037e+00 -5 0 1 +218 1 3.5367098631556342e+00 6.2760250051756761e-01 2.4478818232869410e+00 1 0 -2 +10 1 4.3729774450168737e+00 1.6632191047636544e-01 2.1079853667170236e+00 -2 -1 -2 +64 1 5.7303792911825742e+00 2.9006680684615282e-01 1.9605629224377070e+00 -2 1 2 +40 1 6.5824557178778531e-01 8.5028950299011019e-01 2.1496168105059708e+00 2 1 1 +328 1 1.5589817713112594e-01 1.6225523918451032e+00 1.7310231111691350e+00 3 0 0 +292 1 2.1314993621742819e+00 1.2260233186264009e+00 2.3652294967955245e+00 0 0 -1 +245 1 4.6666032401598603e+00 1.5042368206958292e+00 1.9142905720694421e+00 -3 1 -1 +148 1 6.7720428171922631e+00 8.7008110148972428e-01 2.0934267451930806e+00 0 0 1 +435 1 7.7341388550057273e+00 9.1983544373491155e-01 1.8223856537522831e+00 -2 -1 1 +301 1 2.9295611364078922e-01 2.4942786265027763e+00 2.2355337509297120e+00 -2 -3 -2 +476 1 1.3648021789963285e+00 1.8605609454926342e+00 2.0851101905225424e+00 1 0 -1 +310 1 2.6428836469132526e+00 2.1636738846129742e+00 1.7827293416008847e+00 3 2 0 +137 1 3.6368685754799719e+00 1.7198335009724290e+00 2.1014306554853008e+00 1 -1 1 +434 1 5.6400362255303138e+00 2.0264029757942144e+00 1.7648993119896872e+00 0 -1 0 +184 1 7.6228520959849160e+00 2.1704001606029473e+00 2.4690779972373509e+00 0 1 -1 +5 1 1.5547914352159820e+00 3.0246432147284117e+00 1.9341058666944422e+00 5 1 -2 +41 1 3.5394530649909939e+00 2.8672493763709368e+00 2.3721754433370239e+00 0 0 0 +410 1 5.2622202291807252e+00 3.3532222678989076e+00 2.0249715425869179e+00 2 1 1 +418 1 6.2986243250817040e+00 2.9042203242504363e+00 2.3450580832445986e+00 -1 2 0 +34 1 6.5845359986575269e-01 3.5330771386953552e+00 2.1708302646106170e+00 -2 3 1 +67 1 2.2321109286615104e+00 3.8391986294826608e+00 2.3598318070524229e+00 1 -1 0 +390 1 4.4001972293445766e+00 3.7784400898230768e+00 2.3530190506208175e+00 1 2 -1 +125 1 6.8584035481270371e+00 3.7859398002695177e+00 2.2262509649550242e+00 -3 1 3 +105 1 8.0142836210079444e+00 3.4253500628703644e+00 1.9822510084209746e+00 1 0 -2 +274 1 4.5713678557713822e-01 5.4244335571632307e-01 3.0849174937154351e+00 1 -2 1 +314 1 1.8447059547300777e+00 1.1692544876557610e-01 3.3199499683474132e+00 2 0 -1 +408 1 3.1814603728989130e+00 7.4387930556111925e-02 3.3119410401147560e+00 -1 1 1 +142 1 5.0629800765951494e+00 4.5529434814892644e-01 2.8994044435235393e+00 2 -1 -3 +25 1 6.1307890615815195e+00 4.8707110399724851e-01 2.9156227033318936e+00 0 2 -4 +458 1 7.5149810474983081e+00 4.9072663758191898e-01 2.7754564040841219e+00 2 -4 0 +165 1 1.3117326379233891e+00 1.1343392736952256e+00 3.1076574691841947e+00 -2 2 2 +421 1 2.7633861789985827e+00 9.5478806243591052e-01 3.1957876340691875e+00 -1 -3 1 +151 1 4.7220243778881930e+00 1.4282004346636548e+00 3.2642378377455432e+00 0 0 -1 +162 1 5.6468847674841811e+00 1.3364063405496600e+00 2.5903751818453160e+00 4 -4 -2 +120 1 8.2129099591176686e+00 1.3575499019485984e+00 2.6634845511723606e+00 0 0 0 +448 1 8.8805142887530297e-01 2.2694079305496020e+00 3.1897834611367313e+00 -1 1 3 +50 1 2.9687835700409062e+00 1.9135157390691884e+00 2.8548074734572992e+00 -3 0 1 +443 1 4.0462687470728396e+00 2.3330394967344139e+00 3.1667340148022216e+00 0 1 -2 +482 1 4.9427462157614270e+00 2.1611442624383619e+00 2.6370759110090005e+00 0 3 3 +407 1 5.7753726348779013e+00 2.4435329312939671e+00 3.1756094964019836e+00 0 1 -1 +100 1 6.6610073695947598e+00 1.9476592422501362e+00 2.6046914024931409e+00 -2 -2 1 +99 1 3.5098260053604374e-01 3.2489377905726768e+00 3.1253385231740438e+00 -1 1 -1 +107 1 1.3942178307183000e+00 3.1914627339242005e+00 2.9813408440396350e+00 2 0 1 +480 1 2.3066257454835091e+00 2.6794898513136354e+00 2.7495660819550110e+00 -1 -2 -1 +115 1 3.1214377398204638e+00 2.9223083977660864e+00 3.3233156913824242e+00 1 2 1 +278 1 5.0520729931158854e+00 3.1424127338010743e+00 3.1175324674801042e+00 0 0 -1 +206 1 7.7081484276756580e+00 3.0580419350207482e+00 3.0362913717240096e+00 0 -1 -4 +4 1 1.3000434256419220e+00 4.1733384323360649e+00 2.6659576195319934e+00 1 0 2 +214 1 3.2669228431804429e+00 3.8002115979716740e+00 2.6408106015701289e+00 0 0 0 +65 1 4.0280349657536760e+00 3.3861374196314542e+00 3.2331032594455227e+00 2 1 2 +490 1 5.8571557116011554e+00 3.8735942685941813e+00 2.8613897619661586e+00 -1 -2 0 +230 1 2.4828380364158602e+00 5.5335473677805791e-01 4.1950137944148906e+00 2 -1 3 +234 1 4.2752320357614213e+00 1.4212800762204394e-01 3.7635975156407624e+00 -2 1 -4 +92 1 5.6957646578006917e+00 2.5374658514218495e-02 3.8304068113906884e+00 0 -1 0 +103 1 7.0230616578295741e+00 4.6755655210711161e-01 3.8662999072020967e+00 1 -2 1 +129 1 8.1671299320274162e+00 5.7067862198193264e-01 4.0702212708530077e+00 -1 -2 0 +483 1 3.2862260723228903e-01 1.4557716141563948e+00 3.6577409481728278e+00 -3 4 -1 +110 1 1.5047240722693447e+00 1.0396158231062973e+00 4.0843832165787424e+00 -2 -1 0 +126 1 3.7388270640303456e+00 1.1962750537841655e+00 3.4335639532998909e+00 3 0 0 +276 1 5.6087889039130960e+00 1.1358153696317190e+00 3.7428282467743927e+00 0 2 1 +294 1 6.5127919732673050e+00 1.4345197299837358e+00 3.4448769173070444e+00 -2 0 2 +417 1 7.5734653423068021e+00 1.3756634588591667e+00 3.5078056411981438e+00 -1 1 0 +102 1 1.1107713360470239e+00 2.0167473232388695e+00 4.1576658165713738e+00 -2 0 0 +228 1 2.0072330566273600e+00 1.8105875233272237e+00 3.4619908126078429e+00 2 0 -1 +2 1 3.2101113937010530e+00 2.0183489554560774e+00 3.8822820800419566e+00 0 1 -1 +222 1 4.2730523271288350e+00 1.9092752363315002e+00 4.1553216684001049e+00 1 -1 0 +332 1 6.5042309397343736e+00 2.2214607151080448e+00 4.1057034120156182e+00 0 1 3 +275 1 7.5099659540449100e+00 2.2790896507455778e+00 4.1566328430638730e+00 -1 0 -1 +243 1 8.2736142057938107e+00 2.3393256795577462e+00 3.4990413844829384e+00 0 0 -2 +394 1 5.8512440497757878e-01 2.8807282965119669e+00 3.9612614891397042e+00 1 3 1 +325 1 2.1875714466797338e+00 2.8726101452768167e+00 3.8147636089102748e+00 0 2 -1 +468 1 5.6059790271973755e+00 2.9975764588671621e+00 3.9816014902843078e+00 0 1 0 +113 1 6.5849924400139104e+00 3.1808065990344776e+00 3.4367361163256929e+00 0 0 1 +47 1 1.0081753047752569e+00 3.9556172416934308e+00 3.8502376136806813e+00 1 -1 0 +474 1 1.9694475511677239e+00 3.9381337754040748e+00 3.4201230948176762e+00 4 1 -1 +343 1 3.0370114219017328e+00 3.9465351732349148e+00 3.7436387890882474e+00 0 -2 3 +257 1 6.0639634839200358e+00 3.9545122043987475e+00 3.7922755780058259e+00 1 3 -3 +422 1 7.3939236737263112e+00 3.9358351587357783e+00 3.4932390476416106e+00 -2 -1 -3 +317 1 3.5159353631335000e+00 5.3479576975233323e-01 4.4869163379906354e+00 -1 -2 -4 +322 1 5.5733359556605757e+00 4.8698103039818835e-01 4.7704783768556309e+00 1 6 -1 +453 1 7.3567123647601029e+00 8.3432787155128474e-02 4.8780449077720851e+00 -2 3 2 +82 1 5.8224871123849942e-01 1.1896491837492689e+00 4.7267276175254507e+00 3 0 -1 +402 1 2.4848673528490579e+00 1.6438582837634208e+00 4.4663750233867230e+00 0 2 3 +201 1 3.5370299676438193e+00 1.4874347892452509e+00 4.7752714735806165e+00 0 -2 -1 +159 1 4.7745775925936407e+00 9.0511891230699659e-01 4.2787903105333260e+00 0 -2 2 +239 1 6.3670232447984816e+00 1.2120460051587267e+00 4.4601763672019308e+00 0 1 -1 +340 1 7.6240687426290750e+00 1.3432987598476136e+00 4.6741720105351368e+00 -2 1 1 +246 1 6.4819140516389595e-02 2.1918155429312383e+00 4.6033031341679633e+00 0 2 1 +268 1 5.2454607296226170e+00 2.0016193334579642e+00 4.2366338410742728e+00 -2 2 0 +238 1 1.7670560071066166e+00 3.0774892421151585e+00 4.7759677543328642e+00 0 0 0 +183 1 2.7500175098986750e+00 2.8066272725663279e+00 4.6976729622871014e+00 -2 0 0 +393 1 3.6570205199594930e+00 3.1994600133365143e+00 4.2220986255821451e+00 1 2 -1 +177 1 4.6077861627416805e+00 2.8922144055644265e+00 4.5492372075807843e+00 0 -1 0 +97 1 2.5306051464072796e+00 3.9363146816844674e+00 4.9489842130852137e+00 2 0 1 +16 1 5.1924745493168265e+00 3.7357318418579575e+00 4.8717896643693246e+00 0 -2 -1 +15 1 6.1086100243278070e+00 4.1952083415168335e+00 4.9407045230806066e+00 -1 0 -4 +467 1 6.6872865628091098e+00 3.3657369472951393e+00 4.6943068780100674e+00 1 2 2 +404 1 8.0052336596808171e+00 3.5358005023997046e+00 4.5274309747477952e+00 2 2 -1 +190 1 9.4380374464947103e-01 4.4583992222855645e-01 5.4653306453144710e+00 1 0 1 +429 1 1.8525920421435695e+00 3.5762708549602490e-01 5.0946475568663319e+00 3 0 2 +356 1 3.9097878727975877e+00 6.1716052733039095e-01 5.4734744838905147e+00 -3 1 -1 +459 1 5.0297951477539122e+00 6.4682655744305417e-01 5.6718176557063691e+00 0 1 1 +140 1 6.3845220800541655e+00 6.6200714540119532e-01 5.4418985717149972e+00 2 -1 -2 +121 1 6.3608122327251349e-01 1.6422337064097718e+00 5.7379184381145940e+00 -2 -1 -1 +76 1 2.6863089369401427e+00 9.2826845731889596e-01 5.1620994403726908e+00 -1 -1 -1 +24 1 4.4731219011860990e+00 1.5605471103773427e+00 5.2523372789875760e+00 0 -1 -1 +215 1 5.5638567276602870e+00 1.5540980050800413e+00 5.2788517723209116e+00 2 1 0 +406 1 7.1965017439495353e+00 1.2927814031197866e+00 5.6667263284723859e+00 0 2 -2 +495 1 8.1400977173034548e+00 9.1998286287268760e-01 5.5427200454151757e+00 6 1 0 +256 1 1.5858394354874155e+00 1.8927406862702940e+00 5.2157829401895270e+00 4 0 1 +348 1 2.8144079358075680e+00 2.0457885486401644e+00 5.4765038200952452e+00 1 -1 2 +18 1 3.7959072614405218e+00 2.4151932066273840e+00 5.1090004037855792e+00 0 -2 -3 +259 1 4.8058181463022738e+00 2.5068639859502841e+00 5.5173228168937900e+00 0 0 1 +261 1 5.9067727879570278e+00 2.4818772803689844e+00 5.0470051254431221e+00 0 0 -3 +433 1 6.6238775496086628e+00 2.4655070126810084e+00 5.8390299036823690e+00 -4 1 0 +119 1 7.4961811313748150e+00 2.2560858298720374e+00 5.2761246161627531e+00 0 -2 0 +181 1 8.7803165796510541e-01 2.5368621724963174e+00 5.1009676967579534e+00 3 -4 -2 +392 1 1.3563447389359846e+00 3.2004762283973394e+00 5.8237816659365569e+00 0 1 -1 +436 1 2.3946011665804514e+00 3.0278168308484168e+00 5.6997814720410966e+00 1 1 2 +210 1 5.9632298308600022e+00 3.3253508577291475e+00 5.7908436280268685e+00 0 -1 -2 +409 1 8.2954742717592467e+00 2.8784369153928258e+00 5.3508660402584347e+00 0 0 0 +437 1 7.8285811502936531e-01 3.5972247646914322e+00 5.1173576054548029e+00 -1 2 1 +114 1 3.5350435499222215e+00 3.3621891050940294e+00 5.4926280066777995e+00 0 -1 1 +431 1 7.2810459589983303e+00 4.1806346853520422e+00 5.1699487852752508e+00 0 -1 2 +296 1 2.7493301442805773e-01 9.8739001637521445e-02 6.5256735912844297e+00 2 0 -1 +477 1 2.7650505314326872e+00 5.9323041848923519e-01 6.1880566096904213e+00 -1 0 -2 +199 1 3.8449072993955937e+00 6.8021783868606100e-01 6.5252139926754094e+00 1 0 -1 +31 1 6.8196091485194961e+00 6.5962784856610523e-01 6.6694400071986637e+00 0 -2 -1 +149 1 7.6325509159351759e+00 1.3582393886225264e-01 5.8995743775761014e+00 0 1 -2 +216 1 9.7752025904362638e-01 1.4078822836813889e+00 6.6728311339234487e+00 2 0 1 +425 1 1.7005586383875275e+00 1.1280428057946255e+00 5.8993957526091192e+00 -2 5 2 +224 1 2.8675797989920913e+00 1.6348520503423174e+00 6.4743030869420952e+00 1 0 0 +352 1 3.7461129077349202e+00 1.6467836683144792e+00 5.9711819974315681e+00 0 2 1 +289 1 5.0071286046398447e+00 1.3773263732616476e+00 6.4710057868740503e+00 2 -1 2 +455 1 6.2266605172975309e+00 1.4566180649712972e+00 6.2025029141131824e+00 0 1 2 +298 1 7.2166245583854813e+00 1.5995071342823830e+00 6.6916596114412981e+00 -2 2 -2 +42 1 8.3616236989495309e+00 1.1193553859054726e+00 6.6834990220304640e+00 1 -1 -1 +80 1 1.7994758316879240e+00 2.0602535563487749e+00 6.2756102858385763e+00 1 -1 -2 +127 1 4.3098326176630284e+00 2.3959981453352577e+00 6.4129162374884245e+00 -3 2 -4 +232 1 5.4258898148163945e+00 2.3715461809208245e+00 6.2879862147301759e+00 1 1 -4 +368 1 8.2199446060636472e+00 2.1052670796140989e+00 6.3974575157449376e+00 -1 -1 -2 +486 1 6.9787179588597281e-01 2.5321347977480562e+00 6.5634477651369929e+00 1 -2 1 +209 1 1.7697882729109665e-01 3.3153947644324067e+00 6.2408024968147000e+00 -1 0 -2 +152 1 3.1591516689848946e+00 2.6297173021869003e+00 6.2970081587154914e+00 -1 3 2 +353 1 3.7775468094107074e+00 3.3545010039524996e+00 6.5652719772105916e+00 0 -2 1 +361 1 7.1568303860427589e+00 3.3402954049017803e+00 6.0487812936573579e+00 1 1 0 +391 1 1.8477885811255761e+00 3.9829271272845177e+00 6.5960694610186286e+00 2 0 -1 +464 1 2.8125899145281190e+00 3.9218837233739201e+00 6.1411786897515759e+00 -2 2 1 +497 1 4.5524588331963729e+00 3.4967999912980696e+00 5.9321819433594642e+00 1 -2 2 +498 1 2.0830532473234906e+00 1.1220036849985102e-01 7.1674325217309276e+00 3 3 -1 +264 1 3.1008573853993013e+00 7.6076625369238260e-01 7.2316131354301971e+00 1 1 2 +265 1 4.6342157774659736e+00 7.4471207241976944e-01 7.1556952519725225e+00 0 -1 1 +350 1 5.7838647346068255e+00 5.9645078928256690e-01 6.9763870948006925e+00 2 -2 0 +144 1 1.9122165150526358e+00 1.1329756208528616e+00 6.9185463020768152e+00 0 1 1 +17 1 3.7256587791934250e+00 1.6333740129131904e+00 7.1341698423474043e+00 0 0 0 +441 1 3.6115685474483350e-01 1.9101453221098965e+00 7.5303824824165133e+00 0 1 2 +266 1 1.4762191689510862e+00 2.4662450761248356e+00 7.5398542897976331e+00 -3 -2 -2 +375 1 2.3309589830172550e+00 2.0436669163885055e+00 7.1892097709618481e+00 0 1 0 +351 1 4.7143585284219309e+00 1.8434829864646978e+00 7.2780594229535307e+00 1 0 1 +254 1 5.9170265466448875e+00 1.9140788620193201e+00 7.1554204752492074e+00 0 1 0 +29 1 1.5191420610560982e-01 2.9876711650320327e+00 7.5404622831951533e+00 1 -2 0 +198 1 1.9302038195524280e+00 2.9894398797282253e+00 6.7310853131736357e+00 2 -1 -2 +197 1 3.3083005500744571e+00 2.6144599213348498e+00 7.4059907643060248e+00 1 0 2 +192 1 4.3810729888763209e+00 2.8557380429470860e+00 7.4007735378272006e+00 1 0 1 +220 1 5.1223945377780948e+00 3.3376038243952149e+00 6.7920010375935584e+00 0 0 1 +432 1 6.2458841824934863e+00 2.9482647398415351e+00 6.8353911959966540e+00 0 2 -1 +20 1 7.5153792083913968e+00 2.5459516140888447e+00 6.9548679851000408e+00 3 -1 -2 +63 1 1.0659006163620912e+00 3.4796280956085210e+00 7.1592343773906313e+00 0 1 -2 +446 1 7.1139268453604680e+00 4.0629892995134789e+00 7.0800362551664415e+00 1 -3 -2 +172 1 8.3244437231535144e+00 4.0795393733923691e+00 7.1857426258997332e+00 2 0 -1 +405 1 2.0158180111850474e-01 7.3000938410316307e-01 7.6191994505829879e+00 0 -3 0 +323 1 1.4017230037752237e+00 4.6050949606146502e-01 7.9006469000953414e+00 2 1 -1 +428 1 2.4287753885012338e+00 4.6438480937363963e-01 8.3560508824421493e+00 3 0 2 +280 1 3.6502596381287908e+00 7.8205464112991063e-01 8.2256571665042131e+00 3 1 -1 +54 1 5.2270258856938900e+00 5.3851997974995880e-01 8.1047016704593808e+00 -1 0 -5 +397 1 7.4044521194737944e+00 1.4880482327788007e-02 8.2278445496777337e+00 1 0 2 +279 1 1.2356833818326960e+00 1.5625251472088779e+00 7.9109615213928013e+00 0 4 -2 +134 1 2.5628613498785486e+00 1.5781156241932268e+00 7.9994516338459301e+00 0 1 -1 +236 1 4.5573192582583912e+00 1.4401963680745278e+00 8.1580516205465621e+00 0 3 -3 +493 1 6.1321864263285528e+00 9.9745261592364498e-01 7.9044637123188437e+00 0 3 -3 +346 1 7.4631508711766701e+00 8.7980227698742530e-01 7.6161632460447608e+00 2 -1 -1 +295 1 3.6674081936683844e+00 2.3602153939048316e+00 8.3119852957626765e+00 0 1 -2 +164 1 5.5460188609614756e+00 1.8259166833804881e+00 8.1712445391317772e+00 -2 -2 -1 +354 1 6.6872220974400829e+00 2.0300640265600358e+00 7.8561422683464270e+00 1 2 0 +449 1 7.9034553200360103e+00 1.8534144914629234e+00 8.0299472682703943e+00 -1 -3 0 +185 1 9.5448193278219684e-01 3.3578879558028460e+00 8.1917731250955708e+00 1 1 -1 +229 1 2.5099351180061946e+00 3.0097254013180406e+00 7.8296711975898541e+00 -2 2 -1 +135 1 5.5895093348111047e+00 2.7786209311728598e+00 7.5930433616327404e+00 -1 0 -2 +211 1 7.0529928021063730e+00 3.1919477500830000e+00 7.6370125321552074e+00 -2 2 1 +81 1 1.8872269150105381e+00 3.9351057043865243e+00 7.6163648141042426e+00 0 -1 1 +37 1 3.6614705509244851e+00 3.4915765263742262e+00 7.7267865376074960e+00 2 0 1 +355 1 4.5831332879360058e+00 3.8667909487066860e+00 8.3349240075214972e+00 1 -2 0 +141 1 5.4975686789962985e+00 3.7534665361901731e+00 7.8536976598839008e+00 1 3 1 +122 1 7.2999342247943373e+00 3.9738648524411286e+00 8.3865006440763370e+00 0 -1 -2 +327 1 3.2503689184711210e-01 4.6592524224991747e+00 8.2659471361477532e-01 0 -2 2 +386 1 1.1858359917362140e+00 4.2742921253409367e+00 2.8183822072239956e-01 -1 -2 5 +59 1 1.8702404230468521e+00 4.9717730076471947e+00 7.6874981917086671e-01 0 2 -3 +57 1 2.8167286953460633e+00 4.2569068178507328e+00 1.1953882254793591e-01 1 -1 1 +489 1 3.6302378764516323e+00 4.4521177967574310e+00 6.6729235313037527e-01 1 1 5 +326 1 4.9856357064009993e+00 4.5330987087989216e+00 5.2454407858907726e-01 -1 2 2 +359 1 5.9913033298564331e+00 4.3362327743167306e+00 3.7835915829420080e-01 2 -1 -1 +349 1 7.2426723526689933e+00 4.7181533524380885e+00 7.2707643667494415e-01 0 1 -2 +247 1 8.6433343008963215e-01 5.6823007167579762e+00 6.5224250416194052e-01 1 -3 2 +365 1 3.8233209759419231e+00 5.6939269324333672e+00 5.9607489711922135e-01 -1 -1 0 +382 1 5.1137068164970731e+00 5.5805604939667059e+00 1.8276059551769425e-01 -1 -2 1 +130 1 8.2482820181805554e+00 5.5968590423898927e+00 4.4653409437071251e-01 -1 -1 2 +471 1 1.7119665678072469e+00 5.8953695416344436e+00 4.5930053740843135e-02 -2 0 1 +21 1 2.5641267140454049e+00 6.3080369711057624e+00 5.3936073769936865e-01 1 -2 0 +499 1 4.7164689695839286e+00 6.6465500795405017e+00 1.6611623106065540e-01 -1 -1 1 +253 1 5.9043290983731245e+00 6.1331552003521068e+00 7.5404520106110218e-01 3 -1 0 +302 1 6.9564884663410282e+00 6.1640675729464434e+00 5.7033134187391044e-01 0 -1 -1 +324 1 8.1992983060509594e+00 6.5972265856779417e+00 7.7494747996129210e-01 1 -3 2 +154 1 1.8996589089571863e+00 7.1193783454022102e+00 5.1404963792269565e-01 -3 -1 -1 +284 1 3.0007790559640597e+00 7.2198187648711993e+00 2.3642486802169203e-02 0 0 0 +161 1 3.8133207311484072e+00 7.3201299304675667e+00 5.0202507839899535e-01 -2 -4 -2 +112 1 5.6724794675417822e+00 7.1979165692397187e+00 7.3233205047939343e-01 -1 -3 -2 +39 1 6.4178033104766934e+00 6.8891409921259603e+00 1.5315709015991064e-01 0 0 2 +269 1 7.4906317471214443e+00 6.7375050108085421e+00 4.8287801879068511e-02 3 1 0 +470 1 2.2822847309207428e-01 7.8917014242385868e+00 7.7675485207454809e-01 0 0 1 +84 1 1.3672512040847244e+00 7.9379711016796204e+00 1.6230344562139010e-02 1 0 -3 +381 1 2.6740213254054948e+00 8.0161072175846630e+00 5.8246256296805565e-01 -1 -1 1 +416 1 3.6213693754100857e+00 8.3576521057616233e+00 6.2331560400527297e-01 2 2 -1 +182 1 5.1660049409085866e+00 8.0423641799361949e+00 3.0675573970032322e-01 -2 0 -1 +347 1 6.5429973279348026e+00 7.8912499362162034e+00 4.9818784876300565e-01 -1 1 -2 +380 1 7.4990421428201417e+00 7.6756533739549013e+00 7.5184655726615468e-01 -3 0 0 +19 1 1.4643514526638863e+00 4.2228380130315317e+00 1.3843336624933156e+00 0 3 -1 +430 1 4.3057981012626918e+00 5.0291601875436083e+00 1.2152759600723888e+00 1 1 -2 +212 1 5.0477378389079224e+00 4.2727344176247888e+00 1.5457805058503180e+00 -2 -1 2 +479 1 7.9063585867776194e+00 4.9679041912501303e+00 1.6188677256489217e+00 1 3 -1 +217 1 2.5218672599222897e-01 5.8414577361250837e+00 1.5087288813722908e+00 2 1 2 +147 1 1.1545026869668784e+00 5.3345315151941692e+00 1.5114087897016817e+00 1 2 -1 +13 1 2.8751716646412713e+00 5.2058560720828826e+00 1.0454367061984737e+00 -2 1 -1 +170 1 5.1473087472466297e+00 5.7979187654554183e+00 1.4646868436401976e+00 1 2 0 +204 1 6.2817592200581442e+00 5.1972845711964553e+00 8.9956483663207842e-01 -1 -1 -1 +306 1 8.3865328816009488e-01 6.6369969836196416e+00 9.5403861420405511e-01 1 -1 3 +414 1 1.6815920771923107e+00 6.1626367729687450e+00 1.1744973520180511e+00 0 2 0 +79 1 3.3223472300560521e+00 6.2059256452802716e+00 1.3962300133956274e+00 0 0 0 +116 1 7.6213827835593815e+00 5.9109006810695996e+00 1.2032264335528675e+00 -2 -1 3 +231 1 2.7502476364406934e+00 7.3933926752228123e+00 1.4247539952768773e+00 3 -1 1 +171 1 4.7071880050572981e+00 6.8064285791523735e+00 1.2807974821888473e+00 0 -2 1 +273 1 7.0329000376929018e+00 6.8584131720717227e+00 1.6693583798609657e+00 -2 1 -1 +260 1 8.0496857982994889e+00 7.4547112671714686e+00 1.6374112984134654e+00 3 -1 -2 +43 1 1.0723712760276345e+00 8.0330657119523838e+00 1.4245919245059160e+00 -2 1 0 +208 1 4.3910624755767662e+00 7.9590156044378260e+00 1.1638330407357564e+00 -2 -2 -1 +226 1 5.5077902922207063e+00 8.0548948337249193e+00 1.2764179622713128e+00 0 0 2 +370 1 6.4142184578798505e+00 7.6748965104033502e+00 1.5118878610721513e+00 1 1 1 +85 1 4.9493682223295465e-01 4.4925581352788218e+00 1.7962077237252332e+00 2 0 0 +213 1 2.1575495906041722e+00 4.9142597626853277e+00 1.8811900528366003e+00 0 3 0 +299 1 3.3659976969419567e+00 4.3662606017243588e+00 1.7255850309353928e+00 3 -1 -2 +249 1 4.7082373488231886e+00 4.9470082900514569e+00 2.3877787475724284e+00 0 0 1 +258 1 5.7775488222451887e+00 4.8696821219175135e+00 2.2927307255193869e+00 -1 0 -1 +388 1 6.8398302583746604e+00 4.7642336931962808e+00 1.8670074890749995e+00 1 -4 -3 +91 1 7.7108281252011066e+00 4.3462922152329373e+00 2.4115614404022634e+00 2 -2 1 +415 1 1.3354264256961873e-01 5.2683808667768846e+00 2.4940378524658144e+00 0 3 0 +27 1 2.0564207334277329e+00 5.6999752852325889e+00 2.4901436865643838e+00 0 -2 -1 +465 1 3.6077777180133985e+00 5.3106225786683270e+00 2.2984375934854913e+00 1 4 2 +72 1 1.2115862831734683e+00 6.3271145377400213e+00 2.3088380865765914e+00 -1 -2 -1 +58 1 2.4730215910430200e+00 6.4108021967014599e+00 1.8872921058165837e+00 2 -2 2 +88 1 4.2468496124355051e+00 6.0000736368680991e+00 1.9689248186008255e+00 1 0 -1 +6 1 5.2824888171486872e+00 6.1177755165021708e+00 2.4648490036232666e+00 0 -3 1 +75 1 6.5540148621238368e+00 5.8837981825070473e+00 1.8049347572360399e+00 -3 0 4 +22 1 7.8390642986715990e+00 6.6390504342942007e+00 2.2941937725662940e+00 0 1 0 +128 1 5.0900588055916374e-01 6.9175984002222233e+00 1.8531623061329514e+00 -1 0 1 +193 1 1.7616411201473283e+00 7.3777929885216569e+00 1.8819221158557660e+00 -2 0 0 +389 1 3.8054852300865960e+00 7.1908689074118346e+00 1.6897648139570383e+00 1 -1 -1 +241 1 3.3304867419800805e+00 6.7201719117843943e+00 2.3881392654044515e+00 0 1 0 +14 1 5.6091745306383807e+00 6.8869789388403264e+00 1.7385824045568810e+00 0 -1 0 +307 1 2.6028552221612093e+00 7.5983757451277896e+00 2.5134014261731092e+00 -1 1 0 +401 1 3.3924470446908370e+00 8.2185878004411617e+00 1.7224400376656741e+00 -3 -2 2 +248 1 4.8372436612670020e+00 7.6440956091999794e+00 2.0947200376368853e+00 2 -3 0 +335 1 7.0823280457514208e+00 7.5905134700352388e+00 2.4694687185743178e+00 -1 0 0 +281 1 7.4000691269637358e+00 8.3890728034516613e+00 1.8965880690997261e+00 -1 -1 0 +33 1 2.6215904333511340e+00 4.6944190932346643e+00 2.8031190571610427e+00 1 1 1 +221 1 3.7745777664099962e+00 4.5666560881588492e+00 3.0592511133135365e+00 0 -1 1 +251 1 6.7715334943605834e+00 4.6401458096684580e+00 2.8510683358464712e+00 1 -2 -2 +138 1 1.1922779528936784e+00 5.1973855619084013e+00 2.5689556972218521e+00 0 1 0 +73 1 4.3331751132449305e+00 5.7904710852531096e+00 2.9618985356431899e+00 0 -2 2 +237 1 6.2300560736724488e+00 5.7475979505901522e+00 2.7248929404434143e+00 3 0 0 +77 1 7.3913602469000059e+00 5.6780386160747609e+00 2.6234944353061729e+00 -1 -1 3 +339 1 2.9546703425028309e-01 6.2132195219954411e+00 2.7867557714215443e+00 0 0 2 +69 1 1.3562182919504069e+00 6.0134582221288868e+00 3.2785943115156346e+00 0 -2 1 +150 1 2.9668876558472732e+00 5.9472670316673391e+00 2.8936146954206632e+00 3 -2 -1 +219 1 1.1608118939764323e+00 7.3228681938207032e+00 2.7126116817865631e+00 0 -3 -1 +9 1 2.0627854045350777e+00 6.7454186793237145e+00 2.9360002216641958e+00 0 -2 -2 +3 1 2.9445094497077151e+00 7.1384526732484925e+00 3.3587771406211751e+00 2 -1 -2 +93 1 3.8294061315771235e+00 7.5378392781893275e+00 2.8380550678413003e+00 -1 2 0 +68 1 4.5306997862577427e+00 6.7390306077267583e+00 2.7235217150491495e+00 1 1 3 +35 1 5.4701910601523354e+00 6.9880160912814793e+00 3.3190052755169512e+00 0 0 -2 +174 1 6.2873908725084267e+00 6.7805871501360224e+00 2.7219816088319484e+00 -1 0 2 +369 1 1.7965435829527715e-01 8.0388194202620209e+00 2.5940286664394834e+00 -3 -2 1 +469 1 4.9516921171039039e+00 7.8826764349301754e+00 3.0530014528334970e+00 0 -2 -1 +270 1 5.8608285132223106e+00 7.6946678024150144e+00 2.6778659829007898e+00 0 -1 -2 +196 1 7.6768577919393455e-01 4.8792664640921366e+00 3.5903070116215088e+00 3 -1 1 +235 1 4.2285310787746502e+00 4.2638690385284841e+00 4.1560966227762339e+00 1 1 1 +104 1 4.9767847616154004e+00 4.2048445393860883e+00 3.3962447019771562e+00 -1 -2 4 +363 1 5.8883238331527643e+00 4.9482373301020921e+00 3.4294316309792547e+00 0 0 3 +440 1 8.3695482926311691e+00 4.2129368246316421e+00 3.4018022112738615e+00 -1 2 1 +271 1 1.8521235523587734e+00 5.0753345126075313e+00 3.3807703470063983e+00 0 2 1 +385 1 3.2904276665596646e+00 5.1796485809465160e+00 3.7980367644038786e+00 -3 -1 0 +367 1 4.4116421156217953e+00 5.2602223360914149e+00 3.9453911592241386e+00 1 1 0 +8 1 5.3112651984610038e+00 5.7540585999932050e+00 3.4831459566385630e+00 3 1 0 +445 1 6.8973470747727381e+00 5.7304789961834581e+00 3.7043310817118127e+00 -2 -1 2 +95 1 8.3390772303693836e+00 5.5440620917552232e+00 3.7707087770664116e+00 -2 1 -3 +227 1 2.6907357337748437e+00 5.9773212885609901e+00 3.9377639031978284e+00 -1 -2 0 +45 1 3.9484717591019058e+00 6.4426795460159951e+00 3.6915638730283460e+00 0 1 -1 +62 1 6.2921228111843934e+00 6.5338531152740655e+00 3.8634903896998787e+00 1 0 -3 +309 1 7.4708902093515182e+00 6.5908652825436720e+00 3.6007172393059794e+00 0 -2 0 +70 1 1.1631280783050528e-01 7.0230287218135894e+00 3.4578008409773004e+00 -1 -2 2 +285 1 1.4425208756199777e+00 7.4192404188139784e+00 3.7903676842933867e+00 0 1 -1 +387 1 2.1212416378368082e+00 6.7926691438538640e+00 4.1965603113934069e+00 1 -2 -1 +484 1 4.6356188966878253e+00 7.3783035166039177e+00 4.0565969174155132e+00 0 -1 -1 +98 1 6.8865210518165370e+00 7.3140090488700187e+00 4.1918733358514384e+00 0 -2 -2 +282 1 6.6401301602074470e-01 7.9757076330778913e+00 3.4568068256572881e+00 0 0 2 +203 1 2.5721741898113022e+00 7.9369081377447577e+00 4.0156309667060688e+00 0 3 0 +189 1 3.6912207664687133e+00 7.6806375475121094e+00 3.9059717431750585e+00 0 -1 0 +267 1 6.5713653654298607e+00 7.8128923355065503e+00 3.4078724648171090e+00 -3 -3 2 +155 1 7.9603563078893247e+00 8.1399268391964696e+00 3.4070072411074963e+00 -2 -3 -1 +313 1 1.7289301235151016e+00 4.3022360027322257e+00 4.5514630316827649e+00 0 1 -2 +250 1 3.3811553057719541e+00 4.4820133651244189e+00 4.8062617116575304e+00 1 1 1 +49 1 5.2294045670482578e+00 4.6507731504297771e+00 4.3588420415784839e+00 -1 2 -1 +111 1 6.9650608689197728e+00 4.6666276346705935e+00 4.3879356756979719e+00 1 1 -1 +124 1 8.0933085752880096e+00 4.6382186065764790e+00 4.3125441053619298e+00 1 1 -1 +488 1 5.0933114935797996e-01 5.1359886989747769e+00 4.5948583401263194e+00 -3 1 -2 +48 1 1.5851307073885004e+00 5.5767207395220701e+00 4.2311661806996650e+00 2 -2 1 +123 1 2.5153837170534041e+00 5.0458650845269890e+00 4.5131929409242613e+00 -1 -1 -1 +423 1 5.8208012531410862e+00 5.4948336589690365e+00 4.5916227902397422e+00 2 1 2 +329 1 7.4993364070942770e+00 5.4866100873090584e+00 4.5194723854672301e+00 -2 -2 -2 +334 1 7.1205767430238509e-01 6.1834005542640256e+00 4.2284519502384050e+00 0 1 -1 +146 1 1.3925739327604780e+00 6.6428737862846399e+00 4.8806888882719424e+00 0 1 -2 +500 1 2.2236367185474757e+00 6.0145074663436873e+00 4.9428276989368829e+00 0 1 1 +46 1 3.3290620592760245e+00 5.9295614805296450e+00 4.9789176350871935e+00 1 -1 -3 +374 1 4.7036984817853460e+00 6.2326093994181564e+00 4.3534332596232463e+00 -2 3 2 +52 1 6.8604388860085521e+00 6.3207993870641257e+00 4.9959828655023273e+00 0 -1 -1 +466 1 8.0663702260053096e+00 6.4797694103942982e+00 4.3885769098322607e+00 0 -1 -1 +272 1 6.3857634591010626e-01 7.4306626388140797e+00 4.4685908728843007e+00 -1 0 -1 +331 1 3.1923815943971330e+00 6.8406621847783482e+00 4.3673687027144910e+00 -1 -2 0 +12 1 5.5841339297795631e+00 7.2882403606371549e+00 4.4466343965097535e+00 4 3 0 +178 1 7.5912726284672463e+00 7.0457277620611389e+00 5.0302483408547527e+00 1 -2 0 +376 1 3.5187241629329796e-01 8.3977706803073371e+00 4.6278412560289910e+00 1 -5 2 +117 1 1.3379826357324891e+00 8.3239351364630743e+00 4.2945291322530599e+00 3 0 1 +419 1 1.8896223988418746e+00 7.6393579746671740e+00 4.8168734901107717e+00 0 1 2 +379 1 3.9801436864751545e+00 7.7556819889833708e+00 4.9922836416209275e+00 -1 -2 0 +53 1 4.7488025467298804e+00 8.2113497019933241e+00 4.7088252949914393e+00 -3 0 -1 +28 1 6.4645893658779787e+00 8.2802587137451660e+00 4.5908064174413035e+00 -1 0 -2 +300 1 8.0497419370896441e+00 7.6196002867575432e+00 4.2909429383935933e+00 0 3 0 +83 1 1.5533997706565803e+00 4.2394383012204173e+00 5.5726999917011435e+00 0 0 -1 +304 1 2.5788446900543724e+00 5.0166498820006939e+00 5.6119573167673691e+00 0 -1 0 +51 1 4.2443804124641717e+00 4.2352637746580450e+00 5.1927633388220640e+00 -2 -3 1 +180 1 8.3861058182094546e+00 4.2682396578102644e+00 5.2739914655295381e+00 2 -2 0 +60 1 1.6015559581031933e+00 5.1463292330916399e+00 5.1677022459932900e+00 3 -3 0 +255 1 3.9377408204864559e+00 5.5446289337866919e+00 5.6880007643189954e+00 -1 1 -1 +336 1 4.8593163697287496e+00 5.2245778091909907e+00 5.0488368666183758e+00 0 -1 0 +74 1 5.8562368106306470e+00 5.1343487602136690e+00 5.4844996621256241e+00 -1 -1 -1 +11 1 6.8757956599703460e+00 5.1962144116573894e+00 5.2833696751528123e+00 1 -1 1 +106 1 7.8813740148695439e+00 5.1326569274050362e+00 5.5073989898629776e+00 -2 0 1 +342 1 6.9861903942379322e-01 6.2397324279420259e+00 5.5554818983126264e+00 1 2 1 +366 1 1.5648860489287066e+00 5.8898927909510999e+00 5.8373116466465662e+00 -2 1 1 +153 1 2.7314006683241487e+00 6.0747252624615822e+00 5.8658792109688322e+00 2 0 0 +384 1 3.7649825100367740e+00 6.6140228235604797e+00 5.7068894568556274e+00 3 0 -1 +444 1 5.5919895424131791e+00 6.4007885907393334e+00 5.0609268438363237e+00 0 0 1 +169 1 8.1101285040334883e+00 6.0263607659074259e+00 5.3262646023669920e+00 2 -3 2 +312 1 1.5049751572049341e-01 7.0899457593186419e+00 5.3993285616214672e+00 -4 -1 2 +131 1 2.1405833530478784e+00 6.8932472731161107e+00 5.7016479425245992e+00 -1 -2 -5 +290 1 2.9965099079972810e+00 7.1703954961127128e+00 5.3705836189910157e+00 3 2 0 +240 1 4.7341951239220323e+00 7.1100411829110195e+00 5.1242936547927025e+00 -1 -4 5 +403 1 1.1346681749692125e+00 7.6627110490212882e+00 5.5800616996766177e+00 -3 2 3 +438 1 2.6845809734218404e+00 8.2016169024289791e+00 5.0687325841194584e+00 0 1 1 +420 1 4.5890023008209200e+00 8.1137042645946345e+00 5.6817522441295134e+00 -1 -3 -3 +175 1 5.5503185230040479e+00 7.9962207684468503e+00 5.3390442156703726e+00 -1 -2 1 +305 1 6.7862386693055941e+00 7.6108734242812508e+00 5.4748415197806564e+00 -1 0 0 +341 1 7.4912815420537060e-01 4.3291768420078673e+00 6.1635194432999105e+00 -3 0 0 +291 1 3.3895079643780628e+00 4.7034241504608403e+00 6.1615817441829774e+00 -1 0 1 +372 1 4.6184126650372983e+00 4.6292373343220516e+00 6.3125880555070495e+00 1 -1 -2 +293 1 5.5228203863122962e+00 4.2760181362826257e+00 5.8887719440294752e+00 2 2 2 +461 1 6.6292898049431850e+00 4.3535966489510214e+00 5.9130834363303126e+00 0 -1 -1 +263 1 7.7259268091094802e+00 4.4631499768470926e+00 6.3337285974251110e+00 -3 0 0 +473 1 5.8703225638324330e-01 5.3945075957215893e+00 6.1889975824903258e+00 1 -2 0 +166 1 1.6857123760309740e+00 5.0889285259544463e+00 6.6009674185523135e+00 2 2 2 +132 1 6.2122393744706681e+00 5.4126385838431412e+00 6.6911128610756689e+00 -1 2 1 +32 1 7.1447545314606753e+00 5.2817185569927805e+00 6.2574893275776935e+00 -1 1 0 +475 1 3.1727782520787395e-01 6.3991068893590928e+00 6.5410008153956287e+00 -1 2 0 +283 1 3.2886765713157047e+00 6.4577609617474216e+00 6.6450211825759427e+00 -2 5 -3 +205 1 4.9096514414971573e+00 6.2573093659948649e+00 5.9509143564095108e+00 1 1 0 +158 1 6.0709604726562620e+00 6.0618280221451721e+00 5.9241072976562341e+00 2 -3 1 +1 1 6.8533858129590133e+00 6.5562074315723597e+00 6.5648993855676077e+00 -2 -1 -2 +485 1 3.8125590702509465e-01 7.4616693598979804e+00 6.3609885403345947e+00 0 0 1 +378 1 1.3081995382782592e+00 6.9194359283210023e+00 6.2966360730695206e+00 -1 1 2 +71 1 4.2456384497919455e+00 6.8188407430282236e+00 6.6601493363974615e+00 -1 -4 -4 +320 1 5.9836892193800049e+00 7.1597807912498039e+00 5.9293654675383980e+00 -1 -1 -4 +481 1 7.6691355446374123e+00 7.2329848679784536e+00 6.1548343866270958e+00 2 0 -4 +318 1 1.3774303149915195e+00 8.3818831122591373e+00 6.3319633954681702e+00 -1 3 3 +496 1 2.5904412000483283e+00 7.6460489519926949e+00 6.2257027413180230e+00 0 1 0 +319 1 3.7413059213345305e+00 7.5724262762451815e+00 6.0123092822785358e+00 -2 0 0 +338 1 5.3628717868866635e+00 8.0629328638364353e+00 6.4489666989888104e+00 0 0 2 +358 1 6.2532288322758314e+00 8.3160028104100370e+00 6.1106737733333700e+00 1 1 3 +225 1 7.1188694406828850e+00 8.0966599202958953e+00 6.7037213079900431e+00 0 0 1 +55 1 1.0372861368831470e+00 4.4696141893250934e+00 7.4049774120079697e+00 -1 2 -1 +362 1 2.7501402171762614e+00 4.4083355662417896e+00 7.3996418874276850e+00 1 -1 0 +36 1 3.7396318187704796e+00 4.4801126663955504e+00 7.1074085083223073e+00 2 0 -2 +133 1 4.7860445539287522e+00 4.2902205437459102e+00 7.3711360387569274e+00 -1 -3 -2 +412 1 5.8687821857033606e+00 4.2035753083199188e+00 6.8227572622514323e+00 -4 3 0 +89 1 9.1212528791654535e-02 5.3554487915969240e+00 7.1088625783964092e+00 0 -2 0 +38 1 1.2571311964388701e+00 5.7525434192639207e+00 7.3210178579367868e+00 -1 -5 -2 +337 1 2.7511266822038212e+00 5.4161467707559394e+00 6.9980055112843935e+00 1 -2 -1 +233 1 3.8756258886529467e+00 5.7508116329978627e+00 7.0533607828281815e+00 0 0 -1 +442 1 4.9084237493694953e+00 5.5326539633780083e+00 6.7666717819135362e+00 -4 -4 1 +143 1 5.5776680237268401e+00 5.1621486864820874e+00 7.4635021710959082e+00 1 2 -2 +194 1 7.1453664245542576e+00 5.0984438205910134e+00 7.3333812213368574e+00 0 2 0 +308 1 7.5489760640996861e+00 5.8433222820659871e+00 6.9295207313931844e+00 0 0 0 +286 1 2.1326326019163346e+00 6.2556170719743678e+00 6.7379074391662934e+00 2 0 -2 +61 1 5.6654689088882648e+00 6.2935208603774795e+00 6.9534505775907345e+00 -2 1 -1 +426 1 7.2244687144599562e-01 7.0806885112202815e+00 7.2261712082790313e+00 -1 1 -2 +450 1 1.7213783340257280e+00 7.5184636193370302e+00 6.9767252413839369e+00 4 1 1 +188 1 2.8213470931093725e+00 7.2342621084044723e+00 7.1806231926796817e+00 0 -2 -1 +94 1 3.8026924767351562e+00 7.3932541518378949e+00 7.4596688811177305e+00 -1 2 -2 +252 1 5.1481924139065534e+00 7.2239227196376481e+00 7.0808756518162852e+00 -2 1 -1 +321 1 6.2155610611385042e+00 7.5199920009866501e+00 7.0964075868161212e+00 1 -1 -3 +156 1 7.9944514874782193e+00 7.0252703915440122e+00 7.2541499279788821e+00 -2 1 0 +167 1 5.6917482122629082e-01 8.1417190335213725e+00 7.4962901784513347e+00 2 -2 -1 +330 1 3.3138446283251373e+00 8.2146300363312132e+00 6.8886408760800295e+00 1 -2 0 +195 1 4.3712446295554876e+00 7.8995454090647614e+00 6.7366600234979277e+00 -3 -1 -1 +78 1 5.1856313345157918e+00 8.2127013524100576e+00 7.4620544007464273e+00 0 -1 1 +157 1 7.9011697675935482e+00 8.1764884669087454e+00 7.4266272279794077e+00 -1 0 -1 +373 1 6.7132530681439118e-01 5.0292807223694753e+00 8.1874843860841438e+00 1 2 -2 +56 1 1.7078519119356448e+00 4.9344764637762015e+00 8.1280520422148399e+00 0 2 0 +315 1 3.6234275175752244e+00 4.9451657793378052e+00 8.0476764469560074e+00 -1 -2 0 +478 1 6.4485922344872577e+00 4.3096951707056732e+00 7.7984645181472576e+00 -3 -1 1 +109 1 8.0579925616842267e+00 4.7034410927383776e+00 8.0870676538969590e+00 -1 -2 0 +357 1 2.6884169416542560e+00 5.4427652520296803e+00 8.2033924998655703e+00 0 3 1 +454 1 4.5709826050185614e+00 5.3780110248528246e+00 7.7764192595368709e+00 1 -1 2 +86 1 6.2280039286639228e+00 5.4780735688869227e+00 8.2809905303020894e+00 -1 1 0 +262 1 7.2349855745224341e+00 5.4460551342647063e+00 8.3066978094955939e+00 0 -2 2 +160 1 6.0572932237795518e-01 6.1031638741150802e+00 8.0606222325454144e+00 2 -1 1 +168 1 2.6926304594044645e+00 6.3106981827881823e+00 7.6217004925278191e+00 -1 -3 0 +145 1 3.6817206885980216e+00 6.4794531849637744e+00 8.1206460236595284e+00 -1 -1 -2 +400 1 4.6165015313151514e+00 6.4768264075412638e+00 7.5742703650137084e+00 1 -2 -2 +395 1 5.5951136272170219e+00 6.3298964082146654e+00 7.9312370644766386e+00 0 0 -1 +297 1 6.6597873155694378e+00 6.3692836044578955e+00 7.6364845321455510e+00 1 -4 0 +207 1 8.0115395128671594e+00 5.9375044868831406e+00 7.8466324008781454e+00 -1 1 3 +460 1 6.6754980612335257e-01 7.0521823363262239e+00 8.3451452216672077e+00 -2 1 -3 +44 1 1.5985553198249884e+00 6.8250382119954756e+00 7.7469635466673052e+00 0 1 0 +377 1 4.7397084186898368e+00 7.4631435688157008e+00 8.0338331164055763e+00 0 -1 0 +411 1 5.8132963527118724e+00 7.4646663474419555e+00 8.0261071780587709e+00 2 -1 1 +396 1 7.0242838781762282e+00 7.3479893628886632e+00 7.6603703429086574e+00 2 3 0 +424 1 8.1516255787718581e+00 7.5530513248894016e+00 8.2349155971796755e+00 -2 0 0 +66 1 2.2362583327280716e+00 7.7449451931440727e+00 7.8847519662235594e+00 -1 -2 0 +371 1 3.1656541571472099e+00 8.2451918974189500e+00 8.0415029551628514e+00 1 -1 1 +186 1 4.1675976318493086e+00 8.3247658596031009e+00 7.8802494669419030e+00 1 -1 -1 +413 1 6.4945159878598346e+00 8.3677431053238660e+00 7.6981134933049891e+00 -2 -1 -2 + +Velocities + +7 -1.5197672199477208e+00 -7.1031250708487148e-01 -4.0950627961412567e-01 +398 9.4644999179644840e-01 -8.0422358764146151e-01 -1.1023964746841350e+00 +173 4.8084218242595870e-03 -1.9383808513915850e+00 9.3439099328992314e-01 +462 -1.6390116924948674e+00 -1.2080683544699562e+00 7.6685759600965364e-01 +383 1.2945118204202577e+00 -1.8370931432093225e+00 1.1758972656982776e-01 +288 1.2113220352829337e+00 4.8786819623543370e-01 7.1439798712590263e-01 +303 3.6232856211831610e-01 -3.7927260062054236e-01 -3.0467891118546553e-01 +491 1.6817821327211264e+00 6.3153250223994289e-02 -5.4827183875981267e-01 +187 5.6164532616475686e-02 1.3250338697636479e+00 1.6291294556417517e+00 +163 1.3950518510134631e+00 -1.7090097072259662e+00 -6.0029200570353536e-01 +345 -8.4635427449528189e-01 1.3013642733988193e-01 -3.7582537476409439e-01 +447 -5.5306523194731427e-01 -1.4583063079290524e+00 4.8086237208854654e-01 +108 -3.1879632824306825e-01 -4.9387838912491844e-01 -3.1046215530949173e-01 +427 2.4456665882970424e-01 1.3737057789182878e+00 -1.7474218101951866e-01 +96 -2.8217287015277487e-01 3.9138367367476778e-01 -4.6114700958040086e-02 +494 3.0303709765292064e-01 -9.2861341821366045e-02 -3.6692009603190118e-01 +360 -4.2709052063514102e-01 -1.5167804301477015e+00 -2.6902074926466563e-01 +179 -1.2403873183879999e+00 1.1368400311662936e+00 5.6653449328883521e-01 +136 -1.5859168963178476e+00 1.0728794555657983e+00 5.0876417522892092e-01 +311 -1.4232560247874224e-01 -8.2802934475265932e-01 1.0289469027582252e+00 +242 -1.3753994053365912e+00 3.0823812109914095e-01 1.2519475129774249e-01 +451 1.0684545083916284e-01 -1.3647250068831975e+00 1.2292458701523420e-01 +118 6.5344809858683794e-01 -6.0529931820910909e-01 -6.6821572295625731e-01 +200 -9.1497459004230530e-01 -7.2783581485393822e-01 -8.5265666689286446e-01 +23 1.0121027757280427e+00 -5.8690115135784995e-02 -1.4283745590726701e-01 +30 -1.2406061152582815e+00 3.7490020292637088e-01 -1.4457935973251330e+00 +26 -9.9581994312158117e-01 -1.2441026541932486e-01 -3.6541440712112250e-01 +452 -9.8588039362152369e-01 1.0706468502992672e+00 1.6282162085617651e+00 +439 -1.2386761163593085e+00 4.3106305328993799e-01 5.0410115305724934e-01 +492 2.7094066326642935e-01 6.6318883302565457e-01 1.5999014287193769e-01 +287 -1.7254693201510321e-01 1.1189096817152451e-01 4.1489954379924110e-01 +87 -1.1416496451717975e+00 -7.8732701356097490e-01 -5.1553483379997289e-01 +191 1.2771897324335735e+00 -1.7496443253093807e+00 5.0507874089360993e-01 +316 9.1440987552186193e-01 5.4700363415289210e-01 9.8622452705158947e-01 +176 1.8575029150961730e+00 4.1139607157872565e-01 -7.4590595351664335e-01 +244 -4.6943526746297531e-01 -4.0598167955478592e-01 1.2148844546685273e+00 +463 -6.0334643189530614e-02 9.0710610807033173e-02 -1.1159129056326442e+00 +101 -4.6185710842519001e-01 -1.0745733134156132e+00 -8.4095960640252343e-01 +456 7.3559492687866046e-01 8.4585098605759870e-02 9.9419285892741016e-02 +90 -6.9201778873218364e-02 4.2798282427303436e-01 4.4566581747153294e-01 +457 -2.4132420839469965e-01 8.4836564798364844e-01 1.0955942184355154e+00 +344 6.5538410556645377e-01 1.0527852545027427e+00 6.8154568558028386e-02 +399 -8.3144103937947644e-01 1.9062875610231711e-01 -5.6419101062513155e-01 +472 7.2667243545881360e-02 4.9535613585667015e-01 8.3691870718219108e-01 +364 -2.9526170592863293e-01 -8.7862728527604539e-01 8.2689159018968139e-01 +333 1.7241045239522232e-01 1.0435495829586168e-01 -4.6109306717747689e-01 +223 3.7919285550573256e-01 -1.0672385659469772e+00 3.6984147519214111e-01 +277 -8.8028901289995187e-01 5.8075379122029680e-01 -5.0555689035944396e-01 +487 1.5229361655153211e+00 -4.3334638823410909e-01 -5.2025315373818937e-01 +202 1.1890248672755632e+00 -1.3103428543183313e+00 1.3877359082264291e+00 +139 -3.8593619396281981e-01 -4.1419081865118412e-01 4.8341157555249414e-01 +218 -1.2190042149414249e+00 8.6603581070857749e-01 5.3180226853912571e-01 +10 4.0346230429792890e-01 8.8022045895227152e-01 3.0611723210999553e-01 +64 1.0687583963535543e+00 -2.5590555538042992e-01 8.2852453062456832e-01 +40 -1.0543471069223134e+00 -7.9810717548135568e-01 1.4389073785681245e-01 +328 3.2979243925573831e-02 3.1076559402174242e-01 -1.6410904905031269e+00 +292 -6.3224783054646483e-01 -4.3704038413109603e-01 -2.2759444134328080e+00 +245 -4.7869651996764956e-01 1.4231085014444791e-01 -1.0109564296885869e+00 +148 -7.1011989912373741e-01 -6.8437374624235792e-01 -4.7084060341785217e-01 +435 -2.3710180935788738e-01 7.3150957873543554e-01 1.9569288343148944e-01 +301 -7.3626400316656140e-02 -1.2518892197053937e+00 6.4938262930099341e-02 +476 4.4522311443074564e-01 8.4413304640745324e-01 6.9180275295562699e-01 +310 -4.4579106235383259e-01 -1.4870237388818175e-01 -1.7519046471808755e+00 +137 1.0191269790481958e+00 -1.1871217736355175e+00 6.0687187776921336e-01 +434 -8.0977924479936902e-01 8.2445677967490594e-01 -7.2982619204505017e-01 +184 9.3329968718567657e-01 -7.8960961760367476e-01 -3.2387020805541877e-01 +5 -7.6591643318776403e-01 4.4567507028100856e-01 -5.1504861374485367e-01 +41 -6.0827814801341495e-01 1.0335818250945215e+00 -1.4262577781586459e+00 +410 5.1617643329833041e-01 -1.2505433325731774e-01 6.5548767709779598e-01 +418 7.4452116863445517e-01 5.5176595694257291e-01 4.8433486134467341e-01 +34 -1.0960797951083094e+00 -9.5559899011366922e-01 4.1467430632337771e-01 +67 -1.4762168791195980e+00 7.3902009632630203e-02 -9.8563314451155759e-01 +390 1.1040922756330935e+00 -1.0608530270906485e-01 -8.5480860400114822e-01 +125 1.1001683944330445e+00 2.5213959563273414e-01 -7.0907876564322847e-01 +105 9.3642884637919491e-02 3.2241122070134182e-01 3.4267503174724429e-01 +274 1.4805711826308454e+00 1.9591264275732104e-01 4.6208005550476272e-01 +314 3.3724005635036658e-01 -4.9228354871213487e-01 -5.9105855919176131e-01 +408 4.1789097671328662e-01 2.2054753884758851e-01 9.9409268338014201e-01 +142 1.3529751978153088e+00 8.8819247946851643e-01 -2.9699787225611667e-02 +25 -3.6747378202427655e-01 -1.6653943785151797e+00 2.1594786196965424e-01 +458 -7.3004682150577682e-01 -2.5761460079892318e-01 -7.5492049253862104e-01 +165 -2.6776300065898789e-01 -3.1428309412647404e-02 7.6464836175889062e-01 +421 -2.2112566002008222e-01 -8.0033416193571816e-01 7.4057822704117926e-01 +151 1.6510010513742961e+00 6.2880202489514836e-01 1.3109463507573997e+00 +162 8.6466591751463506e-01 1.2003096922919751e+00 1.6720745976940152e+00 +120 7.1544273833415639e-01 7.8605685992147967e-01 1.5756692842267452e+00 +448 9.4818399052933544e-01 2.0373333697562079e-01 1.1821449360296239e-01 +50 -7.4255028295716818e-01 3.2131856627302258e-01 7.0344624031236247e-01 +443 5.1965861871733798e-01 -1.0469002555669342e+00 4.3373559100868195e-02 +482 1.9661775056161916e+00 1.8911734358361203e+00 7.2949567046678088e-01 +407 1.8582263860937767e-01 -1.3051257268383373e-01 5.0987931207045945e-01 +100 -3.7775697318477114e-01 6.0919017017007682e-01 7.1809182324625254e-01 +99 -1.2684617496938173e+00 7.3671717137305515e-01 -8.1216571664258830e-01 +107 -2.8776644836089277e-01 5.0874100972210130e-02 -4.8277793530428115e-01 +480 -1.8051331679926971e-01 8.6563042329278228e-01 -5.8352716569019136e-01 +115 -1.0589943552297505e+00 -5.2550981762149152e-01 -1.7244688387853080e-01 +278 1.5768829958501078e+00 1.4428098110470169e+00 8.8216722790329116e-01 +206 -1.3653407453215169e+00 5.7341428096126357e-01 2.2629859809279146e-01 +4 4.2054817038258185e-01 -2.9390858515089308e-01 -1.7875237254421117e+00 +214 -1.0426854223671351e+00 -9.9898728143962490e-01 -1.9850479848636837e-01 +65 4.1792586017239108e-01 8.0186431881714215e-01 -2.0107060384389590e+00 +490 6.7355251805377303e-01 -2.0698786089395388e-01 -3.1564646838967253e-01 +230 -1.3855973935201399e-01 1.6134941051796734e+00 2.3723214232474421e-01 +234 1.9442134326447673e+00 -1.3805634063343805e-01 -9.8210270774821284e-01 +92 1.8063255781217027e-01 9.3716955232457000e-01 -1.9187798219061411e-01 +103 3.9559605419305116e-01 -8.7523159162324915e-01 -8.6993815784441930e-01 +129 1.2253789920819727e+00 2.8171983713246046e-01 -1.2180024131333584e+00 +483 9.6349992375661320e-01 -1.6348662648160852e+00 8.3712630375667085e-01 +110 8.0468512955471783e-01 -1.2623668571621947e+00 1.4617362498240638e+00 +126 -2.5741602043303030e-01 -2.5870568275485217e-01 -1.0282926023825125e+00 +276 7.0448925295125497e-01 -1.4486777483518087e-01 4.0589927727544634e-01 +294 4.9277474123671205e-01 1.0788251906341033e+00 1.0720476037391911e+00 +417 1.2134126298835348e+00 6.9341902173441838e-01 -6.5915650495314448e-01 +102 -9.4530772564542864e-01 3.5633646308084904e-01 2.1645216652503570e-02 +228 -3.1477177829390662e-01 -7.3015136189998553e-02 -1.3550661516185267e+00 +2 -1.5167590438599203e+00 3.2942906513575698e-01 -3.9430403043809092e-01 +222 -3.4853423639550507e-01 4.3402827204271471e-01 -8.9055344775375656e-01 +332 7.6671266929186610e-01 -4.2789189576138797e-01 2.2589706911206284e-01 +275 1.0091776783462640e+00 -2.3875723103898955e-01 9.0625273137588058e-01 +243 -2.4008164703199580e-01 2.4602229249214927e-01 -9.2542387373274426e-01 +394 -1.5496683613302659e-01 -1.4731622236607285e+00 4.3712483588631723e-02 +325 -9.9716991278750156e-01 9.0079916710328489e-01 6.1656781470423368e-02 +468 1.1367478364774437e+00 3.7268550263787792e-01 -1.9334998928228118e+00 +113 -6.1637943298580211e-01 -1.4259739417714792e-01 8.5880390205968704e-01 +47 1.4522907038923785e+00 -3.8138661441744320e-01 8.8902241399148096e-01 +474 -6.5326939744671464e-01 -2.9584216080544323e-01 -1.6703076347144621e-01 +343 -2.0865957540293453e-01 4.2320136734663144e-01 -5.2417372636564219e-01 +257 -1.6598412047016537e+00 3.9614126419385248e-01 6.2190463287430520e-01 +422 9.8621254947118736e-02 -4.5653326655140536e-01 -1.6959752095102018e-01 +317 8.1554926779727022e-01 -9.3952932101149422e-01 4.1213701483558629e-01 +322 9.7692194337623842e-02 7.2221211185210599e-03 1.4983319825497168e+00 +453 1.0112117753398511e+00 -1.6043121320019077e-01 -1.5109389287188097e+00 +82 -1.8245270960375951e-01 -1.7227940931260219e-01 9.4914460423273084e-01 +402 -1.3974163926710530e+00 1.0238276956317611e+00 -2.7116721725643461e-01 +201 3.8154565319070316e-01 6.1162789639306392e-01 5.3616806651343329e-01 +159 -5.5623509138788352e-01 -7.3506001159846490e-01 2.9669217335717424e-01 +239 -1.1703971851987414e+00 -2.5624748584715568e-01 1.9987978519039629e-01 +340 -1.1651635328367398e+00 1.4923285973929103e+00 -5.0638764373602618e-01 +246 4.6916366638116123e-02 3.7471219463751682e-01 -8.5595777185358435e-01 +268 3.9776335718715750e-01 -4.1326407010466332e-01 6.4132644428253849e-01 +238 -1.0102989663900468e+00 2.5145560602393102e-01 1.6537809300909093e-01 +183 9.6649446422314420e-02 -3.1320299584600791e-01 8.9787784698607254e-01 +393 -8.1709715097020719e-01 6.5173585509788723e-01 1.3340272254613436e-01 +177 9.2862946550951486e-01 -7.4734865012317631e-01 2.6370187436425798e-02 +97 4.0324698775550633e-01 -6.5060630846020162e-01 4.3381661619662121e-01 +16 9.2225588537666714e-01 5.0761227093429218e-01 2.8224038915514177e-01 +15 7.4631799122735776e-01 -5.1735298399747642e-01 1.0267699639319139e+00 +467 2.9233724948217432e-01 -1.7768548355681282e-01 9.7602334864839035e-01 +404 -1.5591392530118275e+00 6.9331308976828065e-01 -1.9156738913640683e-02 +190 -2.3293475783192552e-01 1.9804103372721136e+00 1.4400561196029799e-01 +429 -3.1240905856412571e-01 5.3399975662794008e-01 -8.3672459106253427e-01 +356 1.1813499076138427e-03 4.5712322524667531e-01 7.2569550209608535e-02 +459 -8.2536401218028255e-01 -5.1587534016495484e-01 -3.2429633054579987e-01 +140 1.4169132709847332e+00 -1.6948863471126016e-01 -5.4590188862129563e-01 +121 -8.0623019357608017e-02 4.3956025735145637e-01 -4.1230464603429395e-01 +76 1.2149513180720075e+00 3.0664283974167705e-01 -6.2206166881984115e-01 +24 -9.0953792569391767e-01 2.7208504534230199e-01 -5.7968255809907288e-01 +215 -3.6635366137979941e-01 4.9083544993599626e-01 5.4540399677247620e-01 +406 -2.8711421371139068e-01 9.8028410382686626e-01 -1.5688211456737056e+00 +495 -2.6064405757207798e-01 1.0014422650657693e+00 1.2195244915445839e-01 +256 5.1502433217852350e-01 2.1618101498582329e+00 -5.5671498574760148e-01 +348 1.5303758850601952e+00 1.3538700733088898e+00 -1.0404602147830582e+00 +18 8.7092721144608365e-01 8.8296303804909981e-01 4.7076145062566271e-01 +259 1.2579194534410862e+00 1.1275351076124240e+00 1.3255628670657002e-01 +261 -1.9457509303164215e+00 -5.7013962206489510e-01 6.5787967104563000e-02 +433 9.5143226323949670e-01 -1.9787864106930605e-01 2.1852616176467912e+00 +119 5.2387728877999162e-01 -7.5008900728698324e-01 1.0156553043596699e+00 +181 3.6547537534957092e-01 4.4856602347738611e-01 1.9502787843157898e-02 +392 1.8883706922750969e+00 1.0836617564317459e+00 6.8587165903540392e-01 +436 -7.5639783907079583e-01 -6.1811736982444421e-01 -2.2826839230281426e-01 +210 -4.5965909404278121e-01 -3.3285948977797364e-01 -2.8815683118847368e-01 +409 -8.9630394160430427e-01 1.2498156320615575e-01 -5.9486610361558201e-01 +437 -1.3458244785393264e-01 1.1611540390359226e-01 3.6707571520967769e-01 +114 -1.1780626363780731e+00 1.1424256338126497e+00 8.1898562672334474e-01 +431 3.9295632541408759e-01 -5.0466339464883803e-01 7.4701651109545830e-01 +296 -2.9562979314792992e-01 9.3794387166245374e-01 4.3821394647736267e-01 +477 -1.0332993738194893e+00 5.9732959339716152e-01 -1.6990304505644747e+00 +199 -1.0627414724922077e+00 3.6527741326574337e-01 -5.0076911469807539e-01 +31 1.1836258822972918e+00 -1.1805527709262840e+00 -1.0148440310730218e-01 +149 -9.7152762187442065e-01 -8.2689424175864179e-01 -9.8308725322137946e-01 +216 -2.6635793686817422e-01 -1.1249304826477751e+00 5.2675272099922754e-01 +425 -2.8287490221194594e-01 4.1086346332975782e-01 -5.6774387340250076e-01 +224 -6.3636066584070272e-01 3.8927738472590379e-01 -1.2043299828812322e+00 +352 -6.1634031766701480e-01 1.9344774717790295e-01 7.8374630494614250e-01 +289 -9.8440193523267117e-01 1.4471012647779624e+00 4.3922631035860088e-01 +455 -1.1942412759322272e+00 1.1300600257912083e+00 -4.7958784865825908e-02 +298 -8.4955866984285322e-01 1.8131854984076790e-01 -1.9010434504555735e+00 +42 1.1523609145851870e+00 1.3198909771653919e+00 -1.1943568765046571e+00 +80 2.0928302112510155e+00 -1.9281727715628849e-01 3.5879525261694012e-01 +127 1.0275071129063864e+00 -7.8560029105042173e-01 8.4680312545585235e-01 +232 3.2836195573764693e-01 1.5921754951459595e+00 -1.4889040346123832e+00 +368 -1.0143580013676627e+00 -6.3286772474382025e-01 5.9003271203631313e-01 +486 3.0052735920053437e-02 -7.5917073216074904e-01 -4.0707609210832352e-01 +209 8.3084043466989521e-01 -1.2819860426930157e-01 9.8450910112172252e-01 +152 1.4018503697498572e+00 -1.3364969077798206e+00 5.1589841760235022e-01 +353 5.3822763779176630e-01 -4.8322987719532096e-01 3.5634065895494110e-01 +361 6.8264929410893807e-02 -1.3682995245096907e+00 -4.9273501999153174e-01 +391 6.7065636846264776e-01 -2.7904582963633018e-01 -6.5303896626046076e-01 +464 7.4567031841775322e-01 1.8046977002555891e+00 -3.9159771733015164e-01 +497 -1.3856699033357778e+00 -9.3599081481125368e-01 1.8204759707532270e-01 +498 1.0568629143757213e+00 -6.8137997517450560e-02 -3.7037143049187105e-01 +264 -7.7894167957263005e-01 2.7383401106980504e-01 7.5098074183721675e-01 +265 1.9614235016832940e-01 -4.2322665725562197e-01 -1.6236881765469688e-01 +350 -6.3256317352651559e-01 -5.9660876051471468e-02 5.2317095757618959e-01 +144 -2.0800773378554721e-01 -1.5156540919629388e-01 -1.8814379829548375e-01 +17 8.2103760580927843e-01 -1.1053143150308307e+00 1.1843881143235824e-01 +441 9.9191589104231870e-01 -1.9479579195707514e-01 1.8130903870970541e+00 +266 -9.4957121369000017e-02 8.6997989048119007e-02 -1.5903074475142429e-01 +375 8.1973242957273296e-01 1.0075441532082390e+00 6.6277118207442265e-01 +351 -1.0694047025049973e+00 1.2185457298237026e-01 7.2731979237547140e-01 +254 -1.4178061472233576e+00 1.0044925984667965e+00 5.9999609355847139e-01 +29 4.7548776272319854e-01 1.5001558567759110e-01 7.4645778473548241e-01 +198 5.7218364285113010e-01 2.9196622753924861e-01 -1.2231490460337893e+00 +197 -1.4204377171882598e-01 2.1322055429678972e+00 3.5360172437812271e-01 +192 -4.3230000337254881e-01 8.1753260459826260e-01 -1.3961239427992276e+00 +220 9.2338173032465304e-02 7.2444278115958327e-01 4.3240465996750244e-02 +432 3.3837153168712325e-01 1.9757543957217949e-01 -3.2138599665961831e-01 +20 7.2972771576502227e-01 -7.1923547937920451e-01 3.8462300862783037e-02 +63 7.3459759072459618e-01 -2.1393390343408181e-01 1.4606085672716668e-01 +446 9.5489174587921177e-01 -2.2284752482645742e-01 -7.3147917056258960e-01 +172 -2.3233356067616504e+00 5.9696639792861073e-01 1.0311292046090428e+00 +405 -1.1779705391325133e+00 2.7846640825986735e-01 9.6440591359561623e-01 +323 1.2758508066094825e+00 1.1576865878083721e-02 1.3415986450967126e+00 +428 4.8021126796596170e-01 4.8003440312842205e-01 -4.1979524804611819e-02 +280 1.1723141734791545e+00 8.4290272518839332e-02 1.1240738308596074e+00 +54 8.2234236592018339e-02 6.9377897211693754e-02 4.6415665343815465e-01 +397 -2.4516753222359439e-01 -3.8647076314136586e-01 -3.1938187998240442e-01 +279 4.7475740365710301e-01 4.2033019401821775e-02 1.0791025172609856e+00 +134 4.6986570792330234e-01 7.5759398359663410e-01 1.3748348906115773e+00 +236 -1.4338534270949166e+00 6.5242359063738475e-01 -4.6879001422000049e-01 +493 -9.5795155221589912e-01 -9.3131490174346532e-01 5.2848598027407112e-01 +346 9.4017276609564226e-01 -1.4423054432512394e+00 -3.4883627652283825e-02 +295 -1.0211326704350645e+00 -1.5688283678435537e+00 2.2737881805456114e-01 +164 4.2933435120637192e-01 1.2908919636558716e+00 -1.8559649323965055e-01 +354 3.1232180266110654e-01 -3.3884174823706786e-01 2.0012570054806617e-01 +449 -4.1956545717722271e-01 8.5033185910749320e-01 -1.6923286514167504e+00 +185 -1.9847768794898153e-02 1.6093024328157031e+00 -3.4278049008935008e-01 +229 3.3354615877618454e-01 -2.1967770508754222e-01 -6.8288738508757751e-01 +135 -3.8598562184690355e-01 2.3459455181853697e-01 1.4591795655915993e-01 +211 -1.1858788850370010e+00 1.3960363241794649e+00 1.1390210005059993e+00 +81 1.3604301094625607e-02 1.2711357052370638e+00 1.3840699383718491e+00 +37 3.5025606454523878e-01 -1.0316562604143946e+00 2.4944393037279400e+00 +355 -4.2713554752038259e-02 1.6441819966987897e-01 -8.2550044716409909e-01 +141 8.1569462514471880e-01 -6.8558748209733145e-01 7.0575287639668483e-01 +122 3.6274405429300222e-01 -4.1985688325391463e-01 2.5223429679090231e-01 +327 5.2689568302324030e-01 -7.1530570005039407e-01 8.1591540939993761e-03 +386 5.2540176252260506e-01 -8.0891002493580211e-01 -2.1080798818740659e-01 +59 3.8008676468324715e-01 1.2302042558959829e+00 -1.2893140123247515e+00 +57 -1.2756375464672689e+00 7.7885421584605408e-01 9.3218921154320622e-01 +489 -4.8772350194378145e-02 1.1774319824237183e+00 -1.3413963962772402e+00 +326 -8.2073133946066312e-01 -1.2685275916429064e+00 -1.0253535003668284e+00 +359 -1.8991447869255085e+00 -1.2154594584519982e+00 -1.7832247863578221e-01 +349 6.4100123949028309e-02 -3.2723074072385872e-01 -1.5565659217165477e+00 +247 4.5734576435732799e-01 4.9511064545143885e-01 -1.5945768728559153e+00 +365 -5.7917383718454207e-01 -2.3494837692508375e+00 -1.5004840205862926e+00 +382 -1.0813649655944142e+00 4.1831946020927230e-01 2.9306887264755938e-01 +130 -5.7488571729114812e-01 -9.6638702807104448e-01 -1.0278487271582966e+00 +471 1.1085411185196428e+00 -5.2404573748396321e-01 9.9734211160337760e-01 +21 -2.3901903994255250e-01 9.3578484419611557e-02 -7.3719660959071831e-01 +499 -4.9906801820206043e-02 4.0499214176262377e-01 -2.9371307883695258e-01 +253 -7.8928864460108283e-01 3.3838305995790624e-01 7.1275694444227405e-02 +302 1.5254343639867396e-01 -2.7881674166920268e-02 9.3627888140226589e-01 +324 -1.1159492478288651e+00 7.0632546920242867e-01 2.5394407536700253e-01 +154 4.8736880051344117e-01 -1.3177717741071062e+00 -7.9660620460177323e-01 +284 1.7716036485389025e-01 -3.6539348914929748e-01 -4.6941135291661629e-01 +161 -2.7479275242316997e-01 7.6982156884799513e-01 -5.7088165723862649e-01 +112 2.7023059482372042e-01 2.0487127767495705e-01 -4.9350658197380731e-01 +39 -1.6840134362063963e-01 -2.0596641915022665e-01 1.2622422574022685e+00 +269 -1.4448376822412348e-01 4.2047192520573229e-01 -1.5751711248887881e+00 +470 -9.0198936075875380e-01 -1.0035762705605118e-01 1.1469701649179411e+00 +84 -3.7165102348228596e-01 1.5718083932531206e+00 1.8715295090631173e-01 +381 6.6740577340943863e-01 -1.9432536761932276e-01 -1.2757754863002870e+00 +416 7.2118119300841677e-01 8.6813375765446710e-01 -8.2350198387929519e-01 +182 3.3694648494832763e-01 -6.5830649973844163e-02 7.0076361524788766e-01 +347 -2.4949061953800500e-01 -1.1432286442922379e+00 -4.5757988288631407e-01 +380 -4.7143472726668367e-01 -2.9180051952103941e-01 6.0581680882348110e-01 +19 7.3007216948068054e-02 9.4947888518620716e-01 3.4627576178512598e-01 +430 -2.2320112834958328e-01 -3.8559102989803251e-01 1.0427665572858709e+00 +212 2.0170777345036919e-01 -9.9332505765821755e-01 1.0550787824425254e+00 +479 -7.7910620356048310e-01 -3.4759422268929079e-02 -5.8756871125828891e-01 +217 -7.0999315500035393e-04 -1.4874035841198494e-01 -1.1389898361525996e+00 +147 3.3953282916732841e-01 1.0503362153417393e+00 8.8554338624429496e-01 +13 1.4952012032076418e+00 -1.3492024227588162e-01 8.6834761707295621e-02 +170 -7.5230462228731629e-02 1.9875829136564169e-01 3.8512887113920530e-01 +204 -3.1459152727076800e-01 -7.2066080044499392e-01 -7.5277710665622377e-01 +306 8.0529112138128323e-01 5.8633344712571456e-01 -1.8256367575393391e+00 +414 -1.7678147014996259e+00 -7.1706970804552028e-01 6.0614094017430153e-01 +79 1.6303375369704216e+00 -9.6446702691566244e-01 1.7014099837302679e+00 +116 -1.7231587507489068e+00 -2.2374070614774770e+00 1.0442058418221465e+00 +231 -9.2546402309298947e-01 3.2157716287919585e-01 -6.2797312032694141e-01 +171 1.4243635122212095e+00 -3.4034411127116748e-01 2.1060050313379719e+00 +273 1.0777993506600285e+00 -1.7160435918146213e+00 1.0194335249558832e+00 +260 1.4544392536719453e+00 9.3765863648494707e-01 1.9642995061847701e+00 +43 -1.1326443879098980e+00 1.4846909715454837e-01 1.6874301063514463e+00 +208 -1.1234489793683757e+00 -1.0408587064067447e-01 -4.8386417041968832e-01 +226 6.0458834484750512e-01 -4.5998597100022531e-01 -4.1420588246526679e-01 +370 -2.5264443636161833e-02 2.2888129565845829e-02 -7.5123334633027034e-02 +85 8.9124431006375637e-01 1.2150234901900336e+00 -1.3794080039677503e+00 +213 6.8750004752856542e-01 1.7643443473184617e-01 -7.6039731517372366e-01 +299 1.1166507011383160e+00 -6.9342398354435130e-03 4.0031150060326554e-01 +249 1.6676259701674601e+00 -1.2916011404415280e+00 8.3081388251616040e-02 +258 -3.3338543070706111e-01 -5.9677208925291603e-01 1.5474505568531705e-01 +388 3.3338877926510010e-02 3.6534261356710357e-01 -9.3163626937108668e-01 +91 -1.0436539497197659e+00 -3.6206368573074071e-01 -3.9406149981573330e-01 +415 1.8022253921012549e-01 -2.5361248238828202e+00 7.8761564433010600e-01 +27 1.6681493439613866e+00 -6.3832374162264238e-01 5.5825898158707521e-01 +465 -3.3883292906124979e-01 -1.9199928938047977e+00 -1.8916637827626086e-02 +72 -7.0256791239588934e-01 -1.3631374858756479e-01 1.1539466869530803e+00 +58 -9.7431680335045678e-02 -1.0635351953490649e-01 -4.5168665276313896e-01 +88 5.2937320884954953e-01 -1.1749454649084838e+00 1.2744645330784524e-01 +6 -1.1593774469007843e+00 5.1413980407296589e-01 1.2944389470552302e+00 +75 5.7374690805841033e-01 7.8433587872715163e-02 -3.9368611836917572e-01 +22 -9.6996881290452630e-01 1.2965746977075312e+00 -1.5394575986278033e-01 +128 -6.4070276272082638e-01 2.2708380115440285e-01 -1.6142126045669323e+00 +193 5.7443781554989515e-01 -1.2458447329459237e+00 -9.3796775534017041e-01 +389 6.7276417854677451e-01 -2.9571653479618748e-01 -2.4423872850138906e-01 +241 -1.4102001294916395e-02 -9.9059498112109945e-01 -8.4827835518272399e-01 +14 1.0515231184651945e+00 4.5617222115757200e-01 -5.2911246318135385e-01 +307 5.0028759749137040e-03 -5.0699437336696396e-01 -2.5790615969461578e-01 +401 3.9207473498457289e-02 -9.2058641117610335e-02 -1.3689269955592848e-01 +248 1.5980225324156891e-01 -4.1600725109692227e-01 4.1253240562945254e-01 +335 5.6526042010051392e-01 -1.0563866873458263e+00 2.2963431232103770e-01 +281 4.7335967523683825e-01 -9.8764893385752195e-01 1.3275577965292436e+00 +33 9.5713349527393091e-02 -6.7578402785937589e-02 1.2308478723868863e+00 +221 -1.2080117435798665e+00 -2.2142477880517361e-01 -7.4060132361069042e-01 +251 5.1352069062594478e-02 -4.7173153686079722e-01 1.3645828375870801e+00 +138 -9.8931419992230929e-01 -5.6403083699472267e-01 4.8437720371021081e-01 +73 -8.7238230525929461e-01 2.2619668651197614e-01 -3.2904255863286136e-01 +237 1.5966476021876643e+00 -1.0734521639966819e+00 -5.7008225474518526e-01 +77 -6.5731386120913948e-01 -1.3672574981662913e-01 3.4397972232069346e-01 +339 6.8164227902965080e-01 -8.7654828372118265e-01 -9.2289632523091991e-01 +69 -1.0710495228647499e+00 -2.2798359247984892e-01 -1.4017813863798740e+00 +150 1.4728554201464288e+00 1.6873571351133543e-01 -4.9418642333439719e-02 +219 3.5347912349947574e-01 1.9808724875609746e-01 -5.0625815831733678e-01 +9 6.3426493841071574e-01 2.5658018877632760e-01 4.7953937763858034e-01 +3 -1.2144659411870629e+00 8.8297859772359033e-01 -2.6732917639533704e-01 +93 3.3822789154655764e-01 8.1143107440094409e-01 -2.7263548606070248e-01 +68 1.6351351046383883e-01 5.8445604848527588e-01 1.2062643207337951e+00 +35 1.6550671586466750e-02 1.6790869037398232e-01 2.5947900830558479e-02 +174 1.4403688502275100e+00 1.4476094634256031e+00 -1.0038411182890872e+00 +369 -7.9303772394565375e-01 -1.1303432398125628e+00 -1.5457440975201582e+00 +469 -5.5239153419630294e-01 -4.7075634498045837e-01 5.0539291079888571e-01 +270 5.0351525849727374e-01 1.3716919044917866e+00 2.8157430421583601e-01 +196 -2.2101193763774998e-01 9.4257812877032954e-01 -1.2881024224381614e+00 +235 -3.7410168233289759e-01 1.9712488864630789e-01 4.3505985658493368e-01 +104 1.0770774810232915e+00 -7.7420116923795979e-01 6.4885799952777634e-01 +363 6.7513909705285891e-01 2.0186757605257277e-01 6.2260156794788402e-01 +440 -2.7563401138205812e-01 4.0196589304635166e-01 6.8696509786942431e-02 +271 3.5486339796858996e-01 -1.0482634846478769e+00 1.0693870635001128e+00 +385 2.0653637543135839e-01 1.6251902525248125e+00 -1.0240800582063148e+00 +367 1.2090890730479817e+00 7.7304993739281713e-01 -9.1196492155951403e-01 +8 1.5650406952874099e-01 1.4761853226272020e-01 7.1667065222651671e-01 +445 -8.1712119890107360e-01 5.0169175756271378e-01 6.3236875667481124e-02 +95 7.1388432342661989e-01 -2.6058252368101411e-02 -1.0159428151424379e+00 +227 -2.1897046900910261e-01 2.4492515107534649e-01 2.2679621024062299e-01 +45 5.0120245022687604e-01 -1.1380921868404339e+00 7.8232214243213327e-01 +62 5.7973338245234629e-01 -7.9117819757367913e-01 -5.4327640325878901e-01 +309 5.3715909018265540e-01 -1.7844975395461696e-01 -1.5521878040430610e+00 +70 -1.3649193486754410e-03 -1.5242800010205164e+00 1.6225001176581824e+00 +285 -1.5639729724408871e+00 1.4626366106935738e+00 -1.2286649953661322e+00 +387 -1.3226131489884976e+00 -1.0695489941103238e+00 -1.4911056977700106e+00 +484 -1.5087954881419843e+00 -2.3407968100714568e-01 8.9471716161806181e-01 +98 3.0624993702155973e-01 -1.5934008419723134e-01 -1.0700290072035785e+00 +282 -1.1678279208732874e+00 8.8816716100523774e-01 -3.7594379985472404e-01 +203 -1.2339287833422024e+00 -1.0851718286318821e+00 -3.5581216293468138e-02 +189 -2.5647410293058537e-01 -3.6970804452537526e-01 -2.7009745024499721e-01 +267 -2.3687566231483211e-02 5.4572876490146272e-01 1.2664988400691950e+00 +155 1.7966821085178111e-01 -1.4254017034472058e-01 -1.0614594824352175e+00 +313 -4.0833555690731410e-02 -4.8193546027639250e-01 -4.9602679807507655e-02 +250 4.3395781720710452e-01 9.1543705256636054e-01 -6.1780594432179192e-01 +49 -5.8496932778818223e-01 2.0845066341835794e-01 -4.5660091846264866e-01 +111 -4.5577164923909808e-01 6.7687347516731056e-01 1.6901306317755600e+00 +124 2.3017035971304120e-01 1.2116381200880159e-02 -1.0715278739263430e+00 +488 1.1047027896127255e+00 -1.2726762145955062e+00 7.7913777050672761e-01 +48 -4.0669146183799126e-01 -1.0181831306551499e+00 6.3005463620771884e-01 +123 -1.2387342693749222e-01 -4.5460712390743935e-01 -5.6241811130968811e-01 +423 1.5051226831159246e+00 -1.0959205280256621e+00 3.5512567969767117e-01 +329 1.5695087401093555e-01 -4.0810545929224507e-01 1.5430948869435961e+00 +334 -1.0908322951892016e+00 -5.4577269465148259e-01 -6.7521769754195848e-01 +146 4.4361993166914260e-01 4.1962079818012665e-01 -3.9049661615798353e-01 +500 -3.0319085884539942e-01 7.6039240701443311e-01 2.0836797720615632e+00 +46 -3.9431430053902639e-01 3.7259615958614579e-02 -1.6080049457594121e+00 +374 9.2250275913940816e-01 -5.4107808677407720e-01 6.5542912197165987e-02 +52 7.3216124676172201e-01 9.4572980131871298e-01 -1.2696251833719743e+00 +466 1.5044833940669236e+00 -1.8483350803538756e+00 9.5057050040256286e-01 +272 -4.0011978947412463e-01 2.0429884556820119e-01 5.4991808303431688e-01 +331 5.7814259377466726e-01 1.7044411336854475e+00 -7.2986308719737536e-01 +12 6.8240059668147224e-01 8.7756006768431916e-01 -1.7546792723746987e+00 +178 -1.1529369201979585e+00 -3.9576843637993819e-01 4.1654001054549866e-01 +376 -1.3678465511836193e+00 -1.5404423718061540e+00 -8.9187926122069050e-01 +117 -1.3804354656475164e+00 -7.9075263667417661e-01 -3.6776627245391275e-01 +419 1.9706999335859632e-01 -1.1815264434314889e+00 1.6458873301525292e-01 +379 2.6784097653047051e-01 3.8806722276897926e-01 4.0603233211791162e-01 +53 1.0521966172519035e+00 5.1164934479578683e-01 9.8577725800730365e-01 +28 1.0129347774638686e+00 1.1058165761432928e+00 -3.9034581257989903e-01 +300 -1.4876024382598509e+00 -6.2467982655378396e-01 1.3535210035045362e+00 +83 9.7907832581926901e-01 1.4855763621624507e+00 4.1007013403839504e-01 +304 -1.8963384707907098e+00 5.2731582964002444e-01 1.0962854464287259e+00 +51 8.5928138726119452e-01 -1.6162178311396377e-01 3.2746315986045632e-01 +180 -3.4136546641226267e-01 1.2771151944969144e+00 -3.9578085701206223e-01 +60 -1.7295103304379962e+00 -9.9492157200179920e-01 1.2224650147307308e+00 +255 5.0740556300071860e-01 -6.6892801585963790e-01 -4.4506165343889975e-01 +336 3.8821455708543523e-01 7.4853146108739210e-01 1.0393955338521590e+00 +74 -8.5185557539809709e-01 5.0099005012093123e-01 6.1187012870886726e-01 +11 -4.8598509694234203e-02 -9.7638648911006976e-01 3.6444261502684167e-01 +106 -8.3362253193144206e-02 -9.2846276284357510e-01 -2.0625632391587234e-01 +342 -1.1850581603543699e+00 -4.6587013939418070e-01 -2.5534356631266192e-01 +366 1.8044603030107276e-01 1.2857436498573065e+00 -4.9002876775305132e-01 +153 5.9355527141792742e-02 8.5309993046714749e-01 1.2601655126485285e+00 +384 -3.3199740732000493e-02 -1.1586262754841152e+00 2.7022536481319620e-01 +444 1.4366137360637837e-01 7.4777392856310121e-01 -1.3874143079892032e-01 +169 -2.2106828869911426e-02 4.3869453276997300e-01 -8.6050040149769946e-01 +312 -9.5175409360636343e-01 -7.9143766981298624e-01 -8.3109524929616485e-01 +131 -1.2661930440833327e-01 4.9560114120296378e-01 1.2481373975929652e-01 +290 4.7988720115251880e-03 4.3307801179133432e-01 7.2537453656012302e-02 +240 -1.4417217340859769e+00 1.4357683004122099e+00 1.4379544254488796e-01 +403 9.8034468727423985e-01 -2.0429774540476020e-01 -1.4454160823908524e-01 +438 1.1944593880989134e+00 2.3483551676455661e+00 1.9984542691081373e+00 +420 1.3486549938776343e-01 -1.0783938772052266e+00 2.4823645088293936e-01 +175 1.6188020186225376e+00 -1.2786027961604105e+00 1.5779664705358573e-01 +305 7.6304434006850930e-01 -1.0609104653684724e+00 -1.3992599279449225e+00 +341 -5.0986549858189378e-01 9.7345649143358179e-02 -5.4583627317992767e-01 +291 1.4937985447982780e+00 1.1345276617898410e-01 1.4735185581275405e+00 +372 2.7438262099508215e-01 2.3642499432513167e-01 2.0103126185436837e-01 +293 -2.0812456222423235e+00 1.2585017395884830e+00 -1.9872753884412903e-01 +461 -1.7366155414350635e+00 6.7563297134023181e-01 -3.0241597324899727e-01 +263 -9.9912758612453856e-01 -1.4606803538340738e+00 -4.8347668759554979e-01 +473 1.5452042518079840e+00 1.3194414166855677e+00 -1.4200655529990689e+00 +166 -1.4143951087621034e-01 -1.1527398628151918e-01 -2.0324403247706951e-01 +132 1.6628716981291343e+00 -2.6751309910688231e-02 -1.1291183234894089e+00 +32 -2.3270952903472897e-01 2.1018074582476372e+00 1.1203402946623044e+00 +475 -1.2604994295453433e+00 -9.5339439518226154e-01 3.3662881048301792e-02 +283 1.2749816341052314e+00 4.7341406662300863e-02 8.5345288522412766e-01 +205 9.6825352209774937e-02 -1.0389613158303228e+00 -5.4505460161674724e-01 +158 8.8743698904591217e-01 1.0689060843560156e+00 6.1104406355038210e-01 +1 5.8273461256327774e-01 4.6658417699512433e-01 7.3964607391058224e-01 +485 4.4170048227791719e-01 -2.8581555293721217e-01 -1.0141724005249320e+00 +378 6.6654429040499424e-02 -1.8224880226392848e+00 1.2128685020610477e+00 +71 -9.1996605782626056e-02 -2.2291664206826806e-01 -1.2402752353304469e+00 +320 -9.5597854323421563e-02 -8.6852053155856812e-01 3.5066557017928385e-01 +481 7.9532304801009490e-01 -9.4180364698072971e-01 1.2495155405965916e-02 +318 -4.1450336396091869e-01 3.4120074122917260e-01 7.3720013366972947e-01 +496 -6.2393347120986375e-01 1.0494054390993381e+00 -1.0590359239552802e+00 +319 -9.5320962685676391e-01 9.2021659866276262e-01 5.5763202369068265e-01 +338 9.5180850251473562e-02 7.4581412234581979e-02 1.6562396473566801e+00 +358 -1.2039319191565336e+00 9.9635011716776112e-01 3.7776599320072252e-01 +225 -2.2878377680357529e-02 5.9393376646738183e-01 -1.7855655501046659e+00 +55 -1.2658412487783444e+00 8.3827650617277571e-01 3.7229167676011687e-01 +362 -1.1190412417889151e+00 -4.1093870066191052e-01 -5.5943417408569930e-02 +36 -1.1944064170110538e+00 5.2834231151049005e-01 -2.5661926163189262e-02 +133 -3.4046853819783990e-01 8.7037698795660379e-01 2.6480748394474063e-01 +412 -8.2433971673189854e-01 -2.4364204997081215e-01 3.2182546651866395e-01 +89 1.2836300411162869e+00 -1.0534639266932924e+00 1.7273414295686953e+00 +38 -5.4018176537409834e-01 1.1189891674084445e+00 -5.4949279328730405e-01 +337 2.0680174457636453e+00 6.8756290663232345e-01 -5.3885707707990127e-01 +233 -1.1580426595635346e-01 1.5352867360847791e+00 1.8920809100075830e-02 +442 -5.2273629379374242e-01 -4.7573534425197805e-01 3.3349171376798270e-01 +143 -6.4456717536444386e-01 -8.2221860127843815e-01 -3.1619033462829965e-01 +194 4.6000888686458724e-01 2.4868203858229221e-01 -1.0844456124091726e+00 +308 7.0412401782623568e-01 1.4705120558853640e-01 -1.6984333842169479e+00 +286 -7.4825969157579242e-01 1.7277684761952672e+00 -4.8658995664543186e-01 +61 -1.4087152482721139e+00 -1.9786675343505181e-01 -8.3049863139435698e-01 +426 -5.9317510584759559e-01 -3.1176204329742879e-01 9.7102624547089234e-01 +450 2.9660411649227686e-01 -1.0106432265344207e+00 -8.0608626908093450e-01 +188 -4.5857896465809017e-01 8.3781782180488956e-02 1.6679261825519407e+00 +94 -2.5105865768429703e-01 -1.0277678550758340e+00 2.0110948522283595e-01 +252 -2.6250057461969262e-01 -8.3779492646801812e-01 -1.2443616941843418e+00 +321 -1.3849227082743394e+00 -7.9961814140752485e-01 -5.2758719721869440e-01 +156 9.7248559724229044e-01 1.4856613149426096e-01 -1.0762319296143268e+00 +167 4.8758255885458768e-01 3.7938735168685944e-01 -8.2720329905212353e-01 +330 4.8147102008722625e-01 -2.9945144880586022e-01 -7.3225876236181275e-01 +195 1.6483881807209144e+00 -1.9598861592898009e-01 -6.2914422221089158e-02 +78 2.3726384124165131e-01 2.7514362192855252e-01 1.7571172900747967e-01 +157 -2.1865090243933563e+00 3.7632441115528098e-01 7.7333806476534650e-01 +373 9.8884980347773899e-01 3.4121263358683873e-01 -3.3626353210018556e-01 +56 -5.1874451216553419e-01 5.0889073330236878e-01 -7.4130187216438104e-01 +315 1.5052362221906064e+00 -6.9840962964633502e-01 -1.5488907100876634e-01 +478 1.1082301092586995e+00 2.9770971389874379e-01 -5.4580262091212584e-01 +109 9.4396405787734605e-01 4.9318782477478235e-01 1.2284569230572757e+00 +357 -1.2371693160029509e+00 4.1376978558912775e-01 3.3529522630256764e-01 +454 -4.4342104600414733e-01 -3.5633346291025164e-01 -2.1131375082322554e-01 +86 -2.6274074185542590e-01 -2.7912975828127135e-01 -3.4564526880694488e-02 +262 -2.0457328043953218e-01 -1.0549970010682790e+00 -8.7569945910877856e-02 +160 1.2508565962389259e-01 -1.1049367732134203e+00 -4.1414673901133031e-01 +168 -5.4833844765552298e-01 9.1122623641564449e-01 -8.1466354261018392e-01 +145 1.9424847461468013e+00 2.4717259671440775e-01 -8.1993106446423478e-02 +400 -7.3927429735196926e-01 9.6278828542903949e-01 9.8355595468926471e-01 +395 -1.7616642510587566e+00 -4.3839255400156163e-01 -4.3538389834393521e-01 +297 -8.1651466743971568e-01 1.9480772762641770e-02 -2.5090574319554337e-01 +207 9.9594630394692474e-01 -5.5333584932915503e-01 -8.1672262386449701e-02 +460 3.8863838706865678e-01 -1.9332061145329088e-01 7.5599739666291188e-01 +44 4.9322334356812642e-01 5.4097595609480897e-01 1.2096517945019982e+00 +377 -2.3588544095100208e-02 4.2843373407529362e-01 -8.1372583759538197e-01 +411 -7.0187880935114388e-01 -8.8403807864596529e-01 3.6139298123719504e-01 +396 -6.1324863673452068e-01 -5.8579142070009704e-01 -1.6980414017487730e+00 +424 -1.4405372913769463e+00 -1.2161267905238037e-01 -7.1275775203655271e-03 +66 -2.2455608800065441e-01 -5.5194985983545303e-02 -2.9031971555487707e-01 +371 2.8693892522161785e-01 -1.3942754264578394e-01 4.1638095135795372e-01 +186 -2.5542578648985415e-01 -2.7435394726950046e-01 1.6931522427410683e+00 +413 5.6007135922818607e-01 1.2189064363493711e-01 -7.5271223366515216e-01 diff --git a/examples/USER/uef/npt_biaxial/in.npt_biaxial b/examples/USER/uef/npt_biaxial/in.npt_biaxial new file mode 100644 index 0000000000000000000000000000000000000000..152054fce652b64b6052e30ec2b73ab7e3f92278 --- /dev/null +++ b/examples/USER/uef/npt_biaxial/in.npt_biaxial @@ -0,0 +1,31 @@ +# biaxial NPT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + +# these commads show the different methods that may be used to impose +# a constant stress through isotropic or anisotropic coupling +fix 1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z +#fix 1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5 erate 0.5 0.5 ext xyz + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 diff --git a/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1 b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..e6d3b30cdbae469ff63f809c6025a44ad10e4bf0 --- /dev/null +++ b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1 @@ -0,0 +1,284 @@ +LAMMPS (22 Sep 2017) +# biaxial NPT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca + orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 500 atoms + reading velocities ... + 500 velocities +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0) + +# these commads show the different methods that may be used to impose +# a constant stress through isotropic or anisotropic coupling +fix 1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z +#fix 1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5 erate 0.5 0.5 ext xyz + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.62256 + ghost atom cutoff = 1.62256 + binsize = 0.811281, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton/tri + stencil: half/bin/3d/newton/tri + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.742 | 2.742 | 2.742 Mbytes +Step c_1_press[1] c_1_press[2] c_1_press[3] + 0 6.3937851 7.0436438 6.4461087 + 50 7.9020345 7.303289 14.733929 + 100 8.3214325 8.385843 13.166247 + 150 5.7168419 5.6083988 9.7224198 + 200 3.8875348 4.0840705 7.9912973 + 250 4.2933724 4.2410519 7.7200953 + 300 4.5452314 4.2434949 8.6709832 + 350 5.446489 5.6166962 11.300536 + 400 6.0682558 5.7787878 12.106852 + 450 5.3884296 5.5066688 9.9100012 + 500 4.8046418 4.7115986 9.9769201 + 550 4.9043933 4.6240966 9.319336 + 600 4.6843238 4.9571246 9.5325487 + 650 5.1952989 5.195648 9.6187047 + 700 5.4163364 5.2938289 10.136655 + 750 5.6723178 5.0670261 11.415221 + 800 5.9379901 5.6402916 10.815209 + 850 5.0695389 5.3021432 10.742859 + 900 4.6498962 4.7111912 9.8453523 + 950 4.4811641 5.170132 9.421765 + 1000 4.7501483 4.2420772 9.3510803 + 1050 4.5973379 5.2046799 9.8632975 + 1100 4.7879562 4.9051316 9.8658626 + 1150 5.0528771 5.5048545 10.110913 + 1200 4.9926563 5.2482081 9.9610863 + 1250 4.8476232 4.8498859 9.5752989 + 1300 5.2932038 5.0594534 10.869545 + 1350 5.4720421 5.0830442 10.69386 + 1400 4.8175566 4.9863651 10.041898 + 1450 4.6307145 4.5615459 9.231299 + 1500 5.0296259 4.5433558 8.7180513 + 1550 4.8708444 5.1601014 10.256791 + 1600 5.5098593 5.316773 10.224386 + 1650 5.5055907 5.0516814 10.272037 + 1700 4.6965552 5.2436628 9.8272905 + 1750 5.0212307 4.740601 8.9738802 + 1800 4.7756351 5.199734 9.9929449 + 1850 4.7107092 5.177203 10.580427 + 1900 5.0935608 4.5286844 9.3036832 + 1950 4.8035855 4.894359 9.4423987 + 2000 4.5643937 4.7480477 9.4384251 + 2050 4.9314701 4.990414 10.151815 + 2100 5.9828824 5.8188026 12.262691 + 2150 5.5337303 5.4598468 10.136112 + 2200 4.892172 4.8699674 9.1629905 + 2250 4.5680591 4.5740533 9.5633545 + 2300 5.0023535 4.3948659 8.9645774 + 2350 5.2533056 4.9803884 10.255653 + 2400 5.3330196 5.3888322 10.021617 + 2450 5.2095527 4.8765336 10.135381 + 2500 5.221153 5.2974568 10.2493 + 2550 5.385331 5.0801192 10.490479 + 2600 5.3274988 5.0253548 10.147587 + 2650 4.718677 5.2710337 9.7549521 + 2700 4.5811521 4.6083971 8.1923164 + 2750 4.4743752 4.3319737 8.8690805 + 2800 5.0215013 5.2262961 9.8627954 + 2850 6.1005107 5.5994942 11.170661 + 2900 5.9673524 5.362124 10.401699 + 2950 5.5400849 5.7523993 10.292536 + 3000 4.9202636 5.1210431 10.125839 + 3050 4.5286487 5.3586164 9.8634322 + 3100 4.5719961 5.1615414 9.8027972 + 3150 5.4091919 4.8581943 9.96938 + 3200 5.522125 5.3564838 9.7638407 + 3250 4.9019062 5.2514758 9.2993079 + 3300 5.2375918 5.1439012 9.4313575 + 3350 4.750235 4.8692016 10.54193 + 3400 5.5793211 5.4184157 11.021389 + 3450 4.9022614 5.3166498 9.4629659 + 3500 4.6348617 4.4650979 9.0665548 + 3550 4.7922405 4.8961269 10.255446 + 3600 4.8914457 5.1158894 9.4736084 + 3650 5.062771 4.6725475 10.263484 + 3700 5.4842823 5.7793971 10.342915 + 3750 5.3136012 5.063065 10.398307 + 3800 4.9372149 4.9270414 9.5304748 + 3850 5.2144752 5.1716455 9.7575725 + 3900 5.0892665 5.1697057 9.918052 + 3950 5.1124507 5.354702 9.791366 + 4000 5.1255084 5.1143653 10.913101 + 4050 5.1891698 4.9913681 9.6871728 + 4100 4.7663368 4.2435014 8.3815668 + 4150 4.8060033 4.3415868 9.6553386 + 4200 4.8548303 4.8006768 9.5995801 + 4250 5.0976366 5.2683175 10.386444 + 4300 5.8921937 5.5134696 10.788143 + 4350 5.8323871 5.5255869 11.199128 + 4400 5.2464655 5.0005905 10.311055 + 4450 4.9264849 5.2499854 10.26702 + 4500 4.4431895 4.536981 8.7489096 + 4550 4.5180914 4.2080277 8.6525529 + 4600 5.1782188 5.1224059 10.683341 + 4650 5.4156233 4.8714464 10.473939 + 4700 5.3107669 5.224614 10.569391 + 4750 4.9538022 5.2509475 10.288918 + 4800 4.6976945 4.8107142 9.8299772 + 4850 5.1227936 5.0737571 10.440452 + 4900 4.7580514 4.6375995 9.1971008 + 4950 5.0647601 4.6470735 9.583131 + 5000 5.196231 5.7531491 10.409807 + 5050 5.6691323 5.7163652 12.335701 + 5100 5.3603738 5.4887106 10.961712 + 5150 4.455028 4.6494465 9.8096968 + 5200 4.7596912 4.4804896 9.3762885 + 5250 5.3144927 5.0113772 9.553101 + 5300 5.3445266 4.8262035 9.1220802 + 5350 5.1540657 5.5982676 10.765178 + 5400 5.1773418 5.2684381 10.452351 + 5450 4.8946859 5.3283747 9.8015564 + 5500 5.2009608 4.7183522 9.4558009 + 5550 5.4158589 5.5005458 10.539505 + 5600 4.7196831 5.4181991 9.6439249 + 5650 4.8333571 4.8601728 8.9350189 + 5700 5.4395698 4.9730096 10.669681 + 5750 5.2947443 5.6973259 10.020539 + 5800 5.4391993 5.5255143 10.264969 + 5850 4.9921388 5.2643827 10.217028 + 5900 5.0048643 4.7952641 8.9718929 + 5950 5.1843818 4.5987295 9.6858944 + 6000 5.0343993 4.946933 9.7436708 + 6050 4.6202714 5.3502658 10.752915 + 6100 5.6914422 5.3621964 10.281827 + 6150 5.1928763 5.9652686 10.923881 + 6200 5.0030409 5.2013891 10.056308 + 6250 4.9699876 5.2363753 9.9964211 + 6300 4.9129606 4.4558458 9.0419952 + 6350 4.6623958 4.4078264 8.528649 + 6400 4.9811441 5.1183207 10.261751 + 6450 5.3644017 5.5153937 10.401295 + 6500 5.6674981 5.7427566 11.928777 + 6550 5.1622364 5.3212928 10.067198 + 6600 4.5954278 5.1645397 10.16724 + 6650 4.9192712 5.0413326 9.95656 + 6700 4.6179845 4.5656214 9.3798952 + 6750 4.7287495 4.5071148 8.7890116 + 6800 4.8600442 4.8083512 10.245595 + 6850 5.0515531 5.1609272 10.553855 + 6900 5.1159742 5.1359869 10.594699 + 6950 4.8908884 5.0592418 9.5698704 + 7000 4.7654136 4.7530776 8.9439321 + 7050 4.779293 4.7534957 9.7244349 + 7100 5.2265831 5.6869073 10.32717 + 7150 5.4019177 5.15174 10.457567 + 7200 4.9817102 5.0596098 10.337574 + 7250 5.1836654 5.6065238 10.723108 + 7300 4.2916569 4.457143 8.5419099 + 7350 4.3906104 4.5439294 9.0805415 + 7400 4.998572 5.3386063 10.491418 + 7450 5.1109022 5.0506801 10.636116 + 7500 5.0248381 5.019932 10.217023 + 7550 5.0109265 5.1438717 9.9032426 + 7600 4.6628614 4.6204146 8.9459669 + 7650 4.8930717 5.0650009 10.049331 + 7700 4.9373454 5.6265835 10.210644 + 7750 5.5001067 5.3133253 10.667995 + 7800 5.0816102 5.0125753 10.591986 + 7850 4.9638046 5.1969015 9.9728333 + 7900 4.8438207 4.9217213 8.9978809 + 7950 4.7318805 4.6248537 8.6806596 + 8000 5.2808543 5.2892613 10.932535 + 8050 5.9609722 5.87087 10.47602 + 8100 5.2190231 5.6693244 11.244536 + 8150 5.3481127 5.2849903 10.222845 + 8200 4.7833053 4.7404657 9.2034474 + 8250 4.5461994 4.510467 10.294452 + 8300 4.6025175 4.8332817 8.7967546 + 8350 5.0389897 5.6642908 10.243402 + 8400 4.8414338 4.8925143 9.3653631 + 8450 5.5087429 4.7830361 10.831666 + 8500 5.2678146 5.1697789 9.9105782 + 8550 5.1211843 4.9097801 9.4165956 + 8600 5.8239149 5.0821022 10.803261 + 8650 5.3620154 5.5831747 11.16202 + 8700 5.1625813 4.8791404 10.537681 + 8750 4.5622461 5.0157549 10.013227 + 8800 4.4051517 5.0224553 9.6364273 + 8850 4.1711629 4.635617 8.5470244 + 8900 4.7049907 5.2458435 10.100728 + 8950 4.8568883 5.2360772 9.2306469 + 9000 5.0091899 5.2203574 10.718541 + 9050 5.1037824 4.9022451 10.24271 + 9100 5.0789015 4.9331454 9.173614 + 9150 5.3865455 5.3427553 11.40199 + 9200 5.5089482 5.9423232 10.976063 + 9250 5.1353552 5.0650262 10.040607 + 9300 4.6761948 4.9155175 9.6413722 + 9350 4.4780834 4.3934708 8.7049819 + 9400 4.2561799 4.7906324 9.046134 + 9450 5.6162819 5.2881846 9.9040868 + 9500 5.7554547 5.6111262 10.23849 + 9550 5.4230462 5.5656045 10.908006 + 9600 5.5045685 4.9818892 9.8929535 + 9650 5.0541481 5.0183351 9.5226021 + 9700 4.9712829 5.2395398 9.9996693 + 9750 5.0960017 5.4419775 10.914719 + 9800 5.0790688 5.6378474 10.00789 + 9850 4.9661747 5.114502 9.4585052 + 9900 5.0133498 4.7456254 9.4572653 + 9950 5.3318846 4.6643122 10.096292 + 10000 5.2227687 4.8924305 9.5894615 +Loop time of 4.78247 on 1 procs for 10000 steps with 500 atoms + +Performance: 903298.340 tau/day, 2090.968 timesteps/s +99.2% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.73894 | 0.73894 | 0.73894 | 0.0 | 15.45 +Neigh | 2.9092 | 2.9092 | 2.9092 | 0.0 | 60.83 +Comm | 0.32306 | 0.32306 | 0.32306 | 0.0 | 6.76 +Output | 0.003392 | 0.003392 | 0.003392 | 0.0 | 0.07 +Modify | 0.6959 | 0.6959 | 0.6959 | 0.0 | 14.55 +Other | | 0.112 | | | 2.34 + +Nlocal: 500 ave 500 max 500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 905 ave 905 max 905 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3340 ave 3340 max 3340 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3340 +Ave neighs/atom = 6.68 +Neighbor list builds = 4580 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:04 diff --git a/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4 b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..9be4413a9690e82a497ff4f9f453b1246d5f7a0f --- /dev/null +++ b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4 @@ -0,0 +1,284 @@ +LAMMPS (22 Sep 2017) +# biaxial NPT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca + orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 500 atoms + reading velocities ... + 500 velocities +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0) + +# these commads show the different methods that may be used to impose +# a constant stress through isotropic or anisotropic coupling +fix 1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z +#fix 1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5 erate 0.5 0.5 ext xyz + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.62256 + ghost atom cutoff = 1.62256 + binsize = 0.811281, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton/tri + stencil: half/bin/3d/newton/tri + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.71 | 2.71 | 2.71 Mbytes +Step c_1_press[1] c_1_press[2] c_1_press[3] + 0 6.3937851 7.0436438 6.4461087 + 50 7.9020345 7.303289 14.733929 + 100 8.3214325 8.385843 13.166247 + 150 5.7168419 5.6083988 9.7224198 + 200 3.8875348 4.0840705 7.9912973 + 250 4.2933724 4.2410519 7.7200953 + 300 4.5452314 4.2434949 8.6709832 + 350 5.446489 5.6166962 11.300536 + 400 6.0682558 5.7787878 12.106852 + 450 5.3884296 5.5066688 9.9100012 + 500 4.8046418 4.7115986 9.9769201 + 550 4.9043933 4.6240966 9.319336 + 600 4.6843238 4.9571246 9.5325487 + 650 5.1952989 5.195648 9.6187047 + 700 5.4163364 5.2938289 10.136655 + 750 5.6723178 5.0670261 11.415221 + 800 5.9379901 5.6402916 10.815209 + 850 5.0695389 5.3021432 10.742859 + 900 4.6498961 4.7111912 9.8453524 + 950 4.4811637 5.1701321 9.4217645 + 1000 4.750149 4.2420768 9.3510788 + 1050 4.5973376 5.2046787 9.8633025 + 1100 4.7879517 4.9051339 9.8658578 + 1150 5.0528775 5.5048671 10.110905 + 1200 4.9926841 5.2482049 9.9610519 + 1250 4.8475836 4.8499116 9.5753651 + 1300 5.2930219 5.0593566 10.869335 + 1350 5.4722342 5.0830411 10.693439 + 1400 4.8165803 4.9851498 10.04213 + 1450 4.6276458 4.5642988 9.2306141 + 1500 5.0196773 4.5470773 8.7204145 + 1550 4.878246 5.1583406 10.263895 + 1600 5.4921049 5.3126759 10.274755 + 1650 5.4363266 5.1708866 9.9880665 + 1700 4.9186005 5.2841155 9.5911223 + 1750 4.9105668 4.7112031 8.9221295 + 1800 4.9833291 4.886821 9.6573393 + 1850 5.0729703 4.8331712 10.094971 + 1900 5.7220173 5.9330299 10.580261 + 1950 5.3176022 5.7140521 11.11604 + 2000 5.3247727 5.5172893 10.622834 + 2050 5.2055235 4.8768078 9.9819356 + 2100 4.4604981 4.1427844 7.8106592 + 2150 4.4406592 4.8264893 9.6662695 + 2200 5.2350836 5.1039145 10.36006 + 2250 5.3777857 5.3274609 11.357157 + 2300 5.4888334 5.9555482 10.76346 + 2350 4.6122564 4.7356468 9.0833059 + 2400 4.6670237 4.4895588 9.0619648 + 2450 4.2201177 4.2558397 9.4898835 + 2500 5.452448 5.4336384 10.50224 + 2550 5.012581 5.316158 10.324517 + 2600 5.0880279 5.1264772 10.085103 + 2650 4.8885834 5.2368982 9.6002032 + 2700 5.1549266 5.3419678 11.335447 + 2750 5.497105 5.3643445 9.9990393 + 2800 4.8826744 4.9875712 10.125435 + 2850 4.8617121 5.1282348 9.5629401 + 2900 4.7883446 4.6187804 9.0562496 + 2950 4.7656266 5.1293592 10.693811 + 3000 5.2676745 5.110172 9.3512146 + 3050 4.5749222 4.8413907 10.311305 + 3100 5.0794819 5.265009 9.1598496 + 3150 5.2078869 5.1879882 10.412548 + 3200 5.0187616 4.6226213 9.7266174 + 3250 5.1541897 4.5157063 9.8355764 + 3300 5.0721396 5.3545282 10.174356 + 3350 5.3984495 5.3222207 10.008886 + 3400 5.1263226 5.1189192 10.361534 + 3450 5.1251845 4.8312752 9.6546597 + 3500 5.1133696 5.2646289 10.320765 + 3550 4.9884235 5.3861707 9.1944042 + 3600 5.196909 5.203186 10.085965 + 3650 5.4717592 5.2205442 10.251283 + 3700 5.4429771 5.3027898 11.385714 + 3750 5.5688484 5.5980199 10.558193 + 3800 4.5239453 4.7021545 8.952588 + 3850 4.6438079 4.6409958 9.3890154 + 3900 5.1108473 4.8787691 10.665694 + 3950 5.6398426 5.4386578 10.668189 + 4000 5.063697 4.9663173 10.513266 + 4050 4.8770847 4.4603573 9.8101845 + 4100 4.3950768 4.3579384 8.3402845 + 4150 4.3355402 5.0429352 10.323111 + 4200 4.7688478 5.051487 9.0632339 + 4250 4.9879366 5.3367146 10.409554 + 4300 5.4578199 5.4889206 10.418789 + 4350 5.6598068 5.4538572 10.842349 + 4400 5.3705312 5.3796871 10.430547 + 4450 4.663804 5.058851 9.2705923 + 4500 4.3439039 4.3523422 8.1747925 + 4550 4.5414802 4.3750772 9.2702452 + 4600 4.9216199 5.2897069 10.747727 + 4650 5.5154852 5.9628437 10.5168 + 4700 5.45199 5.382787 10.654544 + 4750 4.7525419 5.4701385 9.3189378 + 4800 5.3696365 4.6134207 9.4455676 + 4850 5.2444123 5.035993 9.4148435 + 4900 5.6006507 4.8536828 10.283579 + 4950 5.155711 4.978634 10.58973 + 5000 5.0854607 4.9853307 9.2414296 + 5050 5.1098462 4.7349164 9.8739001 + 5100 5.1989395 5.0217416 9.8780949 + 5150 5.612116 5.2165007 10.338464 + 5200 5.0571356 5.3109846 10.685262 + 5250 5.4832657 5.0371665 8.9420853 + 5300 4.5312549 4.9629392 8.2478064 + 5350 5.1617038 5.0533699 10.452218 + 5400 5.7873394 5.6776926 11.926526 + 5450 5.7002516 5.243239 10.940265 + 5500 4.7896799 4.898544 10.163856 + 5550 4.9155627 4.9567495 9.4445476 + 5600 4.2447343 4.5045912 8.7732992 + 5650 5.070197 4.7343938 9.9908239 + 5700 4.9609446 5.0901934 10.812786 + 5750 5.4001631 5.5552888 10.085896 + 5800 5.4209837 4.7153245 9.6865245 + 5850 4.9801041 5.180338 9.8930439 + 5900 5.3423595 5.2341361 10.294159 + 5950 5.683047 5.6830131 10.24313 + 6000 5.0618789 5.4533644 10.713412 + 6050 5.4034888 4.6341621 10.031976 + 6100 5.1934299 4.7525347 9.1287151 + 6150 5.0092398 4.806931 10.024305 + 6200 5.3046516 5.3083532 9.6396223 + 6250 5.2824046 4.7957557 10.305279 + 6300 5.3007029 5.0071874 11.175322 + 6350 5.1128883 4.990408 9.3439118 + 6400 5.0543602 4.9971378 9.8259954 + 6450 4.8843692 4.9116343 10.08132 + 6500 4.5966453 4.8042861 9.160272 + 6550 4.8510961 4.7096646 9.8009968 + 6600 5.658307 5.2330511 10.739519 + 6650 5.2374409 5.3241249 10.291779 + 6700 4.9006975 5.0036186 9.9872029 + 6750 5.2209104 5.1826025 9.5671875 + 6800 4.5238727 4.3070529 8.6072303 + 6850 4.2617247 4.7551571 9.7302077 + 6900 5.6499354 4.8714257 10.723511 + 6950 5.6881769 5.1800721 10.18134 + 7000 5.588834 5.0104896 10.304105 + 7050 4.9404045 4.8589121 9.7096741 + 7100 5.2208179 4.9339808 9.7737491 + 7150 5.4507842 5.046485 10.734783 + 7200 4.9737171 5.5760486 9.1627431 + 7250 4.5967409 4.750471 9.315832 + 7300 5.5147308 5.3202861 10.542679 + 7350 5.7730418 5.5363574 10.384376 + 7400 4.9879586 5.2837443 9.4485798 + 7450 5.1862431 4.8357842 10.017598 + 7500 5.4528245 5.1864957 10.941774 + 7550 5.4202434 5.1089468 10.128264 + 7600 4.8063537 4.8723653 9.0364984 + 7650 4.3144701 4.6148377 9.4939315 + 7700 4.9033831 5.5327473 9.9054613 + 7750 5.0693093 5.0768222 10.473081 + 7800 5.0591805 5.6009473 10.006225 + 7850 5.15269 5.468248 10.404619 + 7900 5.1971759 5.0615117 9.9614488 + 7950 4.9771238 4.886213 9.4730722 + 8000 4.7731123 5.1111433 9.9550597 + 8050 5.1655183 5.4432364 9.6649669 + 8100 5.4597006 5.4026039 11.631184 + 8150 5.3229643 5.4394219 9.9830611 + 8200 4.5420712 4.9359646 9.0121988 + 8250 4.5455108 4.3883996 10.304568 + 8300 5.219721 5.1571958 10.305462 + 8350 4.7291561 4.7391636 9.0768372 + 8400 5.3262934 5.8221591 11.065466 + 8450 4.7583026 5.2282086 10.291955 + 8500 4.9174536 4.5701979 10.140444 + 8550 4.459922 4.8293188 9.4438719 + 8600 4.7962584 4.5811071 9.5158666 + 8650 4.6097275 4.431952 9.4350505 + 8700 5.166554 5.2000584 11.162202 + 8750 5.2353596 5.1668944 10.829751 + 8800 5.3150111 4.9983333 9.8402224 + 8850 4.9571197 4.9196589 9.4880549 + 8900 4.902223 4.7539187 10.002425 + 8950 4.9531983 5.0517321 9.7838444 + 9000 5.4003802 4.9900303 10.550808 + 9050 4.9254643 5.0678701 10.24321 + 9100 4.9902263 4.9056928 9.0326566 + 9150 5.1003677 5.1555374 10.049056 + 9200 5.2358131 5.5834504 10.354698 + 9250 5.5781649 5.1188429 10.361369 + 9300 5.2100192 5.0737267 10.074694 + 9350 5.1462976 4.8010759 9.0279769 + 9400 5.0177693 5.4890092 10.03612 + 9450 5.2507957 5.541141 10.639854 + 9500 5.836784 4.9498236 10.288015 + 9550 5.4698183 5.8761209 10.979924 + 9600 5.0534023 5.0043428 10.436057 + 9650 4.3619773 4.5413125 9.047513 + 9700 4.3777508 4.7902251 8.9501908 + 9750 4.6851974 4.3152085 8.825764 + 9800 5.7312665 4.850913 10.247637 + 9850 6.2290614 5.5480801 10.934907 + 9900 6.0316892 6.1098926 11.562223 + 9950 5.6370814 5.3933342 11.148805 + 10000 4.6429923 5.0853156 9.4267693 +Loop time of 3.06414 on 4 procs for 10000 steps with 500 atoms + +Performance: 1409858.531 tau/day, 3263.561 timesteps/s +99.4% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.27954 | 0.30916 | 0.3866 | 8.1 | 10.09 +Neigh | 1.0657 | 1.129 | 1.2853 | 8.5 | 36.85 +Comm | 0.56334 | 0.80526 | 0.89816 | 15.6 | 26.28 +Output | 0.0032749 | 0.0041527 | 0.006541 | 2.1 | 0.14 +Modify | 0.6522 | 0.67016 | 0.69843 | 2.1 | 21.87 +Other | | 0.1464 | | | 4.78 + +Nlocal: 125 ave 130 max 122 min +Histogram: 2 0 0 0 0 1 0 0 0 1 +Nghost: 459.75 ave 470 max 448 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Neighs: 837 ave 867 max 811 min +Histogram: 2 0 0 0 0 0 0 0 1 1 + +Total # of neighbors = 3348 +Ave neighs/atom = 6.696 +Neighbor list builds = 4589 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:03 diff --git a/examples/USER/uef/nvt_uniaxial/data.wca b/examples/USER/uef/nvt_uniaxial/data.wca new file mode 100644 index 0000000000000000000000000000000000000000..889ba4d2f2a8d2cb3892bda144544f23140ccc03 --- /dev/null +++ b/examples/USER/uef/nvt_uniaxial/data.wca @@ -0,0 +1,1022 @@ +LAMMPS data file via write_data, version 14 May 2016, timestep = 400000 + +500 atoms +1 atom types + +0.0000000000000000e+00 8.3979809569125372e+00 xlo xhi +0.0000000000000000e+00 8.3979809569125372e+00 ylo yhi +0.0000000000000000e+00 8.3979809569125372e+00 zlo zhi + +Masses + +1 1 + +Pair Coeffs # lj/cut + +1 1 1 + +Atoms # atomic + +7 1 2.4137737201272674e-01 2.0017433398687118e-01 3.6157393311703534e-02 2 1 2 +398 1 1.6739594898193126e+00 3.0313452824803544e-01 5.7986723161362197e-01 -3 0 1 +173 1 6.1370793327804556e+00 4.4978954867119525e-01 3.6568951377817088e-01 -4 1 0 +462 1 7.8754716784931862e+00 5.2908038473333074e-01 7.9185633743762940e-01 1 1 -2 +383 1 6.5373096080170212e-01 1.5337525983981986e+00 6.3208419126059423e-01 2 0 1 +288 1 1.9354124990672374e+00 1.3318105136786291e+00 3.6211635210084403e-01 -1 1 0 +303 1 2.9661308460819318e+00 1.1444058564802859e+00 6.5604777151148241e-01 -2 2 1 +491 1 7.1514675802966758e+00 1.3588685826107376e+00 1.2875068928885325e-01 -1 0 0 +187 1 3.0670386025081497e-01 2.4850915964494620e+00 7.0944157374329464e-02 0 -1 0 +163 1 1.2805212773629451e+00 2.4034621328433090e+00 2.0184086197146742e-01 0 1 -1 +345 1 2.5952244948945173e+00 2.3405536448220743e+00 3.0019149048630317e-01 -3 2 1 +447 1 3.9059163101716741e+00 1.6849179478858272e+00 6.0306488750926446e-01 -2 3 -4 +108 1 4.9215629854759335e+00 2.5174843149522088e+00 6.2779912940916158e-02 1 0 -1 +427 1 6.3978861821382305e+00 1.9201774263431104e+00 4.3981049730412797e-01 -1 -2 -3 +96 1 7.4061541304605534e+00 2.4828308207907996e+00 2.4705543772216432e-01 0 -3 2 +494 1 8.0759482343563711e+00 1.6914241739458744e+00 6.8482420189075921e-01 -3 1 2 +360 1 3.1886448226656765e+00 3.1126224743956139e+00 5.4052217066940123e-01 1 1 -2 +179 1 4.2399500260508116e+00 2.8640916432630559e+00 7.5278462735962870e-01 -1 1 1 +136 1 6.2603021059339365e+00 3.2638426060411403e+00 9.9163685662983045e-02 -2 0 3 +311 1 8.0699058291845773e+00 3.1228722160017734e+00 7.5458759573094458e-01 -3 -3 3 +242 1 8.1363443818268044e-02 3.8061968817045986e+00 5.4644298839992535e-02 1 1 1 +451 1 1.9291602479953753e+00 3.5896469823727863e+00 2.3008537852985023e-01 -1 0 0 +118 1 5.3566912079946780e+00 3.5234509548370374e+00 6.7801317822012042e-01 1 -3 -1 +200 1 6.7588369512940338e-01 6.1719413314115790e-01 9.9275606042642373e-01 -1 2 3 +23 1 2.1873327028525060e+00 4.2754803645009956e-02 1.4143560341384132e+00 -3 -1 0 +30 1 3.9592898204338112e+00 6.8093268022015185e-01 1.2987004742604558e+00 -1 -2 -2 +26 1 5.1175076813830467e+00 5.1869033157649591e-01 9.5797839381311178e-01 0 1 2 +452 1 6.5582763761917411e+00 3.4514602671261341e-01 1.2368598815410630e+00 -2 0 -2 +439 1 8.3865135868155445e+00 1.4825845267600596e-01 1.6737363681796917e+00 -2 3 1 +492 1 1.6645816560197648e+00 9.7557398318995125e-01 1.3973497820249918e+00 -2 3 1 +287 1 2.8845951500519784e+00 9.2974386224238703e-01 1.6104516406361071e+00 2 -1 2 +87 1 5.9282888423658395e+00 1.2716232081523247e+00 1.1744299163086194e+00 -1 2 -1 +191 1 7.0465887128985001e+00 1.3447129135151838e+00 1.1378718594147066e+00 -1 2 0 +316 1 6.6785856436835267e-01 2.4243200282222617e+00 1.1253836626025919e+00 0 1 0 +176 1 1.8165460632802826e+00 2.1437252128100086e+00 1.0868234266569741e+00 1 1 1 +244 1 3.5041229236927576e+00 2.3924032537771125e+00 1.3551249499971583e+00 -3 1 4 +463 1 4.9238071200835165e+00 1.7488964460038467e+00 8.7393010597649756e-01 -2 2 1 +101 1 4.7502017837518187e+00 2.5070270251466997e+00 1.6042073679153410e+00 -3 0 -2 +456 1 6.7262574842655534e+00 2.1937608587635533e+00 1.5819520420856528e+00 0 2 3 +90 1 7.8106680805872131e+00 2.3465270292028992e+00 1.4066520475037838e+00 0 -4 1 +457 1 2.2543678933961773e+00 3.1652213402916889e+00 1.1451988834668292e+00 -1 0 0 +344 1 5.9751963550424136e+00 2.8740523385222234e+00 1.2530570351392452e+00 0 1 -1 +399 1 8.2525481582297566e-01 3.4796551803028120e+00 1.0452539946382762e+00 1 -1 -1 +472 1 2.5500022667255768e+00 4.1144011701979659e+00 1.1937493716055980e+00 0 1 2 +364 1 3.2074279852156709e+00 3.3909867375667604e+00 1.5492265188845586e+00 -1 0 0 +333 1 4.2063776055408209e+00 3.7436012461483292e+00 1.2983951160694676e+00 -1 1 1 +223 1 6.0720407506223024e+00 3.9437056464141231e+00 1.5634726007362729e+00 0 0 1 +277 1 6.8850289844945918e+00 3.5744109988378070e+00 1.1843729982426427e+00 2 0 0 +487 1 8.0236797280148657e+00 4.0759691449476652e+00 1.1816920447826709e+00 -1 0 5 +202 1 1.1425010515906946e+00 1.1629395296284512e-02 2.5086432758529211e+00 -1 -2 0 +139 1 2.3147555111337756e+00 1.8665688332261610e-01 2.4220734387693037e+00 -5 0 1 +218 1 3.5367098631556342e+00 6.2760250051756761e-01 2.4478818232869410e+00 1 0 -2 +10 1 4.3729774450168737e+00 1.6632191047636544e-01 2.1079853667170236e+00 -2 -1 -2 +64 1 5.7303792911825742e+00 2.9006680684615282e-01 1.9605629224377070e+00 -2 1 2 +40 1 6.5824557178778531e-01 8.5028950299011019e-01 2.1496168105059708e+00 2 1 1 +328 1 1.5589817713112594e-01 1.6225523918451032e+00 1.7310231111691350e+00 3 0 0 +292 1 2.1314993621742819e+00 1.2260233186264009e+00 2.3652294967955245e+00 0 0 -1 +245 1 4.6666032401598603e+00 1.5042368206958292e+00 1.9142905720694421e+00 -3 1 -1 +148 1 6.7720428171922631e+00 8.7008110148972428e-01 2.0934267451930806e+00 0 0 1 +435 1 7.7341388550057273e+00 9.1983544373491155e-01 1.8223856537522831e+00 -2 -1 1 +301 1 2.9295611364078922e-01 2.4942786265027763e+00 2.2355337509297120e+00 -2 -3 -2 +476 1 1.3648021789963285e+00 1.8605609454926342e+00 2.0851101905225424e+00 1 0 -1 +310 1 2.6428836469132526e+00 2.1636738846129742e+00 1.7827293416008847e+00 3 2 0 +137 1 3.6368685754799719e+00 1.7198335009724290e+00 2.1014306554853008e+00 1 -1 1 +434 1 5.6400362255303138e+00 2.0264029757942144e+00 1.7648993119896872e+00 0 -1 0 +184 1 7.6228520959849160e+00 2.1704001606029473e+00 2.4690779972373509e+00 0 1 -1 +5 1 1.5547914352159820e+00 3.0246432147284117e+00 1.9341058666944422e+00 5 1 -2 +41 1 3.5394530649909939e+00 2.8672493763709368e+00 2.3721754433370239e+00 0 0 0 +410 1 5.2622202291807252e+00 3.3532222678989076e+00 2.0249715425869179e+00 2 1 1 +418 1 6.2986243250817040e+00 2.9042203242504363e+00 2.3450580832445986e+00 -1 2 0 +34 1 6.5845359986575269e-01 3.5330771386953552e+00 2.1708302646106170e+00 -2 3 1 +67 1 2.2321109286615104e+00 3.8391986294826608e+00 2.3598318070524229e+00 1 -1 0 +390 1 4.4001972293445766e+00 3.7784400898230768e+00 2.3530190506208175e+00 1 2 -1 +125 1 6.8584035481270371e+00 3.7859398002695177e+00 2.2262509649550242e+00 -3 1 3 +105 1 8.0142836210079444e+00 3.4253500628703644e+00 1.9822510084209746e+00 1 0 -2 +274 1 4.5713678557713822e-01 5.4244335571632307e-01 3.0849174937154351e+00 1 -2 1 +314 1 1.8447059547300777e+00 1.1692544876557610e-01 3.3199499683474132e+00 2 0 -1 +408 1 3.1814603728989130e+00 7.4387930556111925e-02 3.3119410401147560e+00 -1 1 1 +142 1 5.0629800765951494e+00 4.5529434814892644e-01 2.8994044435235393e+00 2 -1 -3 +25 1 6.1307890615815195e+00 4.8707110399724851e-01 2.9156227033318936e+00 0 2 -4 +458 1 7.5149810474983081e+00 4.9072663758191898e-01 2.7754564040841219e+00 2 -4 0 +165 1 1.3117326379233891e+00 1.1343392736952256e+00 3.1076574691841947e+00 -2 2 2 +421 1 2.7633861789985827e+00 9.5478806243591052e-01 3.1957876340691875e+00 -1 -3 1 +151 1 4.7220243778881930e+00 1.4282004346636548e+00 3.2642378377455432e+00 0 0 -1 +162 1 5.6468847674841811e+00 1.3364063405496600e+00 2.5903751818453160e+00 4 -4 -2 +120 1 8.2129099591176686e+00 1.3575499019485984e+00 2.6634845511723606e+00 0 0 0 +448 1 8.8805142887530297e-01 2.2694079305496020e+00 3.1897834611367313e+00 -1 1 3 +50 1 2.9687835700409062e+00 1.9135157390691884e+00 2.8548074734572992e+00 -3 0 1 +443 1 4.0462687470728396e+00 2.3330394967344139e+00 3.1667340148022216e+00 0 1 -2 +482 1 4.9427462157614270e+00 2.1611442624383619e+00 2.6370759110090005e+00 0 3 3 +407 1 5.7753726348779013e+00 2.4435329312939671e+00 3.1756094964019836e+00 0 1 -1 +100 1 6.6610073695947598e+00 1.9476592422501362e+00 2.6046914024931409e+00 -2 -2 1 +99 1 3.5098260053604374e-01 3.2489377905726768e+00 3.1253385231740438e+00 -1 1 -1 +107 1 1.3942178307183000e+00 3.1914627339242005e+00 2.9813408440396350e+00 2 0 1 +480 1 2.3066257454835091e+00 2.6794898513136354e+00 2.7495660819550110e+00 -1 -2 -1 +115 1 3.1214377398204638e+00 2.9223083977660864e+00 3.3233156913824242e+00 1 2 1 +278 1 5.0520729931158854e+00 3.1424127338010743e+00 3.1175324674801042e+00 0 0 -1 +206 1 7.7081484276756580e+00 3.0580419350207482e+00 3.0362913717240096e+00 0 -1 -4 +4 1 1.3000434256419220e+00 4.1733384323360649e+00 2.6659576195319934e+00 1 0 2 +214 1 3.2669228431804429e+00 3.8002115979716740e+00 2.6408106015701289e+00 0 0 0 +65 1 4.0280349657536760e+00 3.3861374196314542e+00 3.2331032594455227e+00 2 1 2 +490 1 5.8571557116011554e+00 3.8735942685941813e+00 2.8613897619661586e+00 -1 -2 0 +230 1 2.4828380364158602e+00 5.5335473677805791e-01 4.1950137944148906e+00 2 -1 3 +234 1 4.2752320357614213e+00 1.4212800762204394e-01 3.7635975156407624e+00 -2 1 -4 +92 1 5.6957646578006917e+00 2.5374658514218495e-02 3.8304068113906884e+00 0 -1 0 +103 1 7.0230616578295741e+00 4.6755655210711161e-01 3.8662999072020967e+00 1 -2 1 +129 1 8.1671299320274162e+00 5.7067862198193264e-01 4.0702212708530077e+00 -1 -2 0 +483 1 3.2862260723228903e-01 1.4557716141563948e+00 3.6577409481728278e+00 -3 4 -1 +110 1 1.5047240722693447e+00 1.0396158231062973e+00 4.0843832165787424e+00 -2 -1 0 +126 1 3.7388270640303456e+00 1.1962750537841655e+00 3.4335639532998909e+00 3 0 0 +276 1 5.6087889039130960e+00 1.1358153696317190e+00 3.7428282467743927e+00 0 2 1 +294 1 6.5127919732673050e+00 1.4345197299837358e+00 3.4448769173070444e+00 -2 0 2 +417 1 7.5734653423068021e+00 1.3756634588591667e+00 3.5078056411981438e+00 -1 1 0 +102 1 1.1107713360470239e+00 2.0167473232388695e+00 4.1576658165713738e+00 -2 0 0 +228 1 2.0072330566273600e+00 1.8105875233272237e+00 3.4619908126078429e+00 2 0 -1 +2 1 3.2101113937010530e+00 2.0183489554560774e+00 3.8822820800419566e+00 0 1 -1 +222 1 4.2730523271288350e+00 1.9092752363315002e+00 4.1553216684001049e+00 1 -1 0 +332 1 6.5042309397343736e+00 2.2214607151080448e+00 4.1057034120156182e+00 0 1 3 +275 1 7.5099659540449100e+00 2.2790896507455778e+00 4.1566328430638730e+00 -1 0 -1 +243 1 8.2736142057938107e+00 2.3393256795577462e+00 3.4990413844829384e+00 0 0 -2 +394 1 5.8512440497757878e-01 2.8807282965119669e+00 3.9612614891397042e+00 1 3 1 +325 1 2.1875714466797338e+00 2.8726101452768167e+00 3.8147636089102748e+00 0 2 -1 +468 1 5.6059790271973755e+00 2.9975764588671621e+00 3.9816014902843078e+00 0 1 0 +113 1 6.5849924400139104e+00 3.1808065990344776e+00 3.4367361163256929e+00 0 0 1 +47 1 1.0081753047752569e+00 3.9556172416934308e+00 3.8502376136806813e+00 1 -1 0 +474 1 1.9694475511677239e+00 3.9381337754040748e+00 3.4201230948176762e+00 4 1 -1 +343 1 3.0370114219017328e+00 3.9465351732349148e+00 3.7436387890882474e+00 0 -2 3 +257 1 6.0639634839200358e+00 3.9545122043987475e+00 3.7922755780058259e+00 1 3 -3 +422 1 7.3939236737263112e+00 3.9358351587357783e+00 3.4932390476416106e+00 -2 -1 -3 +317 1 3.5159353631335000e+00 5.3479576975233323e-01 4.4869163379906354e+00 -1 -2 -4 +322 1 5.5733359556605757e+00 4.8698103039818835e-01 4.7704783768556309e+00 1 6 -1 +453 1 7.3567123647601029e+00 8.3432787155128474e-02 4.8780449077720851e+00 -2 3 2 +82 1 5.8224871123849942e-01 1.1896491837492689e+00 4.7267276175254507e+00 3 0 -1 +402 1 2.4848673528490579e+00 1.6438582837634208e+00 4.4663750233867230e+00 0 2 3 +201 1 3.5370299676438193e+00 1.4874347892452509e+00 4.7752714735806165e+00 0 -2 -1 +159 1 4.7745775925936407e+00 9.0511891230699659e-01 4.2787903105333260e+00 0 -2 2 +239 1 6.3670232447984816e+00 1.2120460051587267e+00 4.4601763672019308e+00 0 1 -1 +340 1 7.6240687426290750e+00 1.3432987598476136e+00 4.6741720105351368e+00 -2 1 1 +246 1 6.4819140516389595e-02 2.1918155429312383e+00 4.6033031341679633e+00 0 2 1 +268 1 5.2454607296226170e+00 2.0016193334579642e+00 4.2366338410742728e+00 -2 2 0 +238 1 1.7670560071066166e+00 3.0774892421151585e+00 4.7759677543328642e+00 0 0 0 +183 1 2.7500175098986750e+00 2.8066272725663279e+00 4.6976729622871014e+00 -2 0 0 +393 1 3.6570205199594930e+00 3.1994600133365143e+00 4.2220986255821451e+00 1 2 -1 +177 1 4.6077861627416805e+00 2.8922144055644265e+00 4.5492372075807843e+00 0 -1 0 +97 1 2.5306051464072796e+00 3.9363146816844674e+00 4.9489842130852137e+00 2 0 1 +16 1 5.1924745493168265e+00 3.7357318418579575e+00 4.8717896643693246e+00 0 -2 -1 +15 1 6.1086100243278070e+00 4.1952083415168335e+00 4.9407045230806066e+00 -1 0 -4 +467 1 6.6872865628091098e+00 3.3657369472951393e+00 4.6943068780100674e+00 1 2 2 +404 1 8.0052336596808171e+00 3.5358005023997046e+00 4.5274309747477952e+00 2 2 -1 +190 1 9.4380374464947103e-01 4.4583992222855645e-01 5.4653306453144710e+00 1 0 1 +429 1 1.8525920421435695e+00 3.5762708549602490e-01 5.0946475568663319e+00 3 0 2 +356 1 3.9097878727975877e+00 6.1716052733039095e-01 5.4734744838905147e+00 -3 1 -1 +459 1 5.0297951477539122e+00 6.4682655744305417e-01 5.6718176557063691e+00 0 1 1 +140 1 6.3845220800541655e+00 6.6200714540119532e-01 5.4418985717149972e+00 2 -1 -2 +121 1 6.3608122327251349e-01 1.6422337064097718e+00 5.7379184381145940e+00 -2 -1 -1 +76 1 2.6863089369401427e+00 9.2826845731889596e-01 5.1620994403726908e+00 -1 -1 -1 +24 1 4.4731219011860990e+00 1.5605471103773427e+00 5.2523372789875760e+00 0 -1 -1 +215 1 5.5638567276602870e+00 1.5540980050800413e+00 5.2788517723209116e+00 2 1 0 +406 1 7.1965017439495353e+00 1.2927814031197866e+00 5.6667263284723859e+00 0 2 -2 +495 1 8.1400977173034548e+00 9.1998286287268760e-01 5.5427200454151757e+00 6 1 0 +256 1 1.5858394354874155e+00 1.8927406862702940e+00 5.2157829401895270e+00 4 0 1 +348 1 2.8144079358075680e+00 2.0457885486401644e+00 5.4765038200952452e+00 1 -1 2 +18 1 3.7959072614405218e+00 2.4151932066273840e+00 5.1090004037855792e+00 0 -2 -3 +259 1 4.8058181463022738e+00 2.5068639859502841e+00 5.5173228168937900e+00 0 0 1 +261 1 5.9067727879570278e+00 2.4818772803689844e+00 5.0470051254431221e+00 0 0 -3 +433 1 6.6238775496086628e+00 2.4655070126810084e+00 5.8390299036823690e+00 -4 1 0 +119 1 7.4961811313748150e+00 2.2560858298720374e+00 5.2761246161627531e+00 0 -2 0 +181 1 8.7803165796510541e-01 2.5368621724963174e+00 5.1009676967579534e+00 3 -4 -2 +392 1 1.3563447389359846e+00 3.2004762283973394e+00 5.8237816659365569e+00 0 1 -1 +436 1 2.3946011665804514e+00 3.0278168308484168e+00 5.6997814720410966e+00 1 1 2 +210 1 5.9632298308600022e+00 3.3253508577291475e+00 5.7908436280268685e+00 0 -1 -2 +409 1 8.2954742717592467e+00 2.8784369153928258e+00 5.3508660402584347e+00 0 0 0 +437 1 7.8285811502936531e-01 3.5972247646914322e+00 5.1173576054548029e+00 -1 2 1 +114 1 3.5350435499222215e+00 3.3621891050940294e+00 5.4926280066777995e+00 0 -1 1 +431 1 7.2810459589983303e+00 4.1806346853520422e+00 5.1699487852752508e+00 0 -1 2 +296 1 2.7493301442805773e-01 9.8739001637521445e-02 6.5256735912844297e+00 2 0 -1 +477 1 2.7650505314326872e+00 5.9323041848923519e-01 6.1880566096904213e+00 -1 0 -2 +199 1 3.8449072993955937e+00 6.8021783868606100e-01 6.5252139926754094e+00 1 0 -1 +31 1 6.8196091485194961e+00 6.5962784856610523e-01 6.6694400071986637e+00 0 -2 -1 +149 1 7.6325509159351759e+00 1.3582393886225264e-01 5.8995743775761014e+00 0 1 -2 +216 1 9.7752025904362638e-01 1.4078822836813889e+00 6.6728311339234487e+00 2 0 1 +425 1 1.7005586383875275e+00 1.1280428057946255e+00 5.8993957526091192e+00 -2 5 2 +224 1 2.8675797989920913e+00 1.6348520503423174e+00 6.4743030869420952e+00 1 0 0 +352 1 3.7461129077349202e+00 1.6467836683144792e+00 5.9711819974315681e+00 0 2 1 +289 1 5.0071286046398447e+00 1.3773263732616476e+00 6.4710057868740503e+00 2 -1 2 +455 1 6.2266605172975309e+00 1.4566180649712972e+00 6.2025029141131824e+00 0 1 2 +298 1 7.2166245583854813e+00 1.5995071342823830e+00 6.6916596114412981e+00 -2 2 -2 +42 1 8.3616236989495309e+00 1.1193553859054726e+00 6.6834990220304640e+00 1 -1 -1 +80 1 1.7994758316879240e+00 2.0602535563487749e+00 6.2756102858385763e+00 1 -1 -2 +127 1 4.3098326176630284e+00 2.3959981453352577e+00 6.4129162374884245e+00 -3 2 -4 +232 1 5.4258898148163945e+00 2.3715461809208245e+00 6.2879862147301759e+00 1 1 -4 +368 1 8.2199446060636472e+00 2.1052670796140989e+00 6.3974575157449376e+00 -1 -1 -2 +486 1 6.9787179588597281e-01 2.5321347977480562e+00 6.5634477651369929e+00 1 -2 1 +209 1 1.7697882729109665e-01 3.3153947644324067e+00 6.2408024968147000e+00 -1 0 -2 +152 1 3.1591516689848946e+00 2.6297173021869003e+00 6.2970081587154914e+00 -1 3 2 +353 1 3.7775468094107074e+00 3.3545010039524996e+00 6.5652719772105916e+00 0 -2 1 +361 1 7.1568303860427589e+00 3.3402954049017803e+00 6.0487812936573579e+00 1 1 0 +391 1 1.8477885811255761e+00 3.9829271272845177e+00 6.5960694610186286e+00 2 0 -1 +464 1 2.8125899145281190e+00 3.9218837233739201e+00 6.1411786897515759e+00 -2 2 1 +497 1 4.5524588331963729e+00 3.4967999912980696e+00 5.9321819433594642e+00 1 -2 2 +498 1 2.0830532473234906e+00 1.1220036849985102e-01 7.1674325217309276e+00 3 3 -1 +264 1 3.1008573853993013e+00 7.6076625369238260e-01 7.2316131354301971e+00 1 1 2 +265 1 4.6342157774659736e+00 7.4471207241976944e-01 7.1556952519725225e+00 0 -1 1 +350 1 5.7838647346068255e+00 5.9645078928256690e-01 6.9763870948006925e+00 2 -2 0 +144 1 1.9122165150526358e+00 1.1329756208528616e+00 6.9185463020768152e+00 0 1 1 +17 1 3.7256587791934250e+00 1.6333740129131904e+00 7.1341698423474043e+00 0 0 0 +441 1 3.6115685474483350e-01 1.9101453221098965e+00 7.5303824824165133e+00 0 1 2 +266 1 1.4762191689510862e+00 2.4662450761248356e+00 7.5398542897976331e+00 -3 -2 -2 +375 1 2.3309589830172550e+00 2.0436669163885055e+00 7.1892097709618481e+00 0 1 0 +351 1 4.7143585284219309e+00 1.8434829864646978e+00 7.2780594229535307e+00 1 0 1 +254 1 5.9170265466448875e+00 1.9140788620193201e+00 7.1554204752492074e+00 0 1 0 +29 1 1.5191420610560982e-01 2.9876711650320327e+00 7.5404622831951533e+00 1 -2 0 +198 1 1.9302038195524280e+00 2.9894398797282253e+00 6.7310853131736357e+00 2 -1 -2 +197 1 3.3083005500744571e+00 2.6144599213348498e+00 7.4059907643060248e+00 1 0 2 +192 1 4.3810729888763209e+00 2.8557380429470860e+00 7.4007735378272006e+00 1 0 1 +220 1 5.1223945377780948e+00 3.3376038243952149e+00 6.7920010375935584e+00 0 0 1 +432 1 6.2458841824934863e+00 2.9482647398415351e+00 6.8353911959966540e+00 0 2 -1 +20 1 7.5153792083913968e+00 2.5459516140888447e+00 6.9548679851000408e+00 3 -1 -2 +63 1 1.0659006163620912e+00 3.4796280956085210e+00 7.1592343773906313e+00 0 1 -2 +446 1 7.1139268453604680e+00 4.0629892995134789e+00 7.0800362551664415e+00 1 -3 -2 +172 1 8.3244437231535144e+00 4.0795393733923691e+00 7.1857426258997332e+00 2 0 -1 +405 1 2.0158180111850474e-01 7.3000938410316307e-01 7.6191994505829879e+00 0 -3 0 +323 1 1.4017230037752237e+00 4.6050949606146502e-01 7.9006469000953414e+00 2 1 -1 +428 1 2.4287753885012338e+00 4.6438480937363963e-01 8.3560508824421493e+00 3 0 2 +280 1 3.6502596381287908e+00 7.8205464112991063e-01 8.2256571665042131e+00 3 1 -1 +54 1 5.2270258856938900e+00 5.3851997974995880e-01 8.1047016704593808e+00 -1 0 -5 +397 1 7.4044521194737944e+00 1.4880482327788007e-02 8.2278445496777337e+00 1 0 2 +279 1 1.2356833818326960e+00 1.5625251472088779e+00 7.9109615213928013e+00 0 4 -2 +134 1 2.5628613498785486e+00 1.5781156241932268e+00 7.9994516338459301e+00 0 1 -1 +236 1 4.5573192582583912e+00 1.4401963680745278e+00 8.1580516205465621e+00 0 3 -3 +493 1 6.1321864263285528e+00 9.9745261592364498e-01 7.9044637123188437e+00 0 3 -3 +346 1 7.4631508711766701e+00 8.7980227698742530e-01 7.6161632460447608e+00 2 -1 -1 +295 1 3.6674081936683844e+00 2.3602153939048316e+00 8.3119852957626765e+00 0 1 -2 +164 1 5.5460188609614756e+00 1.8259166833804881e+00 8.1712445391317772e+00 -2 -2 -1 +354 1 6.6872220974400829e+00 2.0300640265600358e+00 7.8561422683464270e+00 1 2 0 +449 1 7.9034553200360103e+00 1.8534144914629234e+00 8.0299472682703943e+00 -1 -3 0 +185 1 9.5448193278219684e-01 3.3578879558028460e+00 8.1917731250955708e+00 1 1 -1 +229 1 2.5099351180061946e+00 3.0097254013180406e+00 7.8296711975898541e+00 -2 2 -1 +135 1 5.5895093348111047e+00 2.7786209311728598e+00 7.5930433616327404e+00 -1 0 -2 +211 1 7.0529928021063730e+00 3.1919477500830000e+00 7.6370125321552074e+00 -2 2 1 +81 1 1.8872269150105381e+00 3.9351057043865243e+00 7.6163648141042426e+00 0 -1 1 +37 1 3.6614705509244851e+00 3.4915765263742262e+00 7.7267865376074960e+00 2 0 1 +355 1 4.5831332879360058e+00 3.8667909487066860e+00 8.3349240075214972e+00 1 -2 0 +141 1 5.4975686789962985e+00 3.7534665361901731e+00 7.8536976598839008e+00 1 3 1 +122 1 7.2999342247943373e+00 3.9738648524411286e+00 8.3865006440763370e+00 0 -1 -2 +327 1 3.2503689184711210e-01 4.6592524224991747e+00 8.2659471361477532e-01 0 -2 2 +386 1 1.1858359917362140e+00 4.2742921253409367e+00 2.8183822072239956e-01 -1 -2 5 +59 1 1.8702404230468521e+00 4.9717730076471947e+00 7.6874981917086671e-01 0 2 -3 +57 1 2.8167286953460633e+00 4.2569068178507328e+00 1.1953882254793591e-01 1 -1 1 +489 1 3.6302378764516323e+00 4.4521177967574310e+00 6.6729235313037527e-01 1 1 5 +326 1 4.9856357064009993e+00 4.5330987087989216e+00 5.2454407858907726e-01 -1 2 2 +359 1 5.9913033298564331e+00 4.3362327743167306e+00 3.7835915829420080e-01 2 -1 -1 +349 1 7.2426723526689933e+00 4.7181533524380885e+00 7.2707643667494415e-01 0 1 -2 +247 1 8.6433343008963215e-01 5.6823007167579762e+00 6.5224250416194052e-01 1 -3 2 +365 1 3.8233209759419231e+00 5.6939269324333672e+00 5.9607489711922135e-01 -1 -1 0 +382 1 5.1137068164970731e+00 5.5805604939667059e+00 1.8276059551769425e-01 -1 -2 1 +130 1 8.2482820181805554e+00 5.5968590423898927e+00 4.4653409437071251e-01 -1 -1 2 +471 1 1.7119665678072469e+00 5.8953695416344436e+00 4.5930053740843135e-02 -2 0 1 +21 1 2.5641267140454049e+00 6.3080369711057624e+00 5.3936073769936865e-01 1 -2 0 +499 1 4.7164689695839286e+00 6.6465500795405017e+00 1.6611623106065540e-01 -1 -1 1 +253 1 5.9043290983731245e+00 6.1331552003521068e+00 7.5404520106110218e-01 3 -1 0 +302 1 6.9564884663410282e+00 6.1640675729464434e+00 5.7033134187391044e-01 0 -1 -1 +324 1 8.1992983060509594e+00 6.5972265856779417e+00 7.7494747996129210e-01 1 -3 2 +154 1 1.8996589089571863e+00 7.1193783454022102e+00 5.1404963792269565e-01 -3 -1 -1 +284 1 3.0007790559640597e+00 7.2198187648711993e+00 2.3642486802169203e-02 0 0 0 +161 1 3.8133207311484072e+00 7.3201299304675667e+00 5.0202507839899535e-01 -2 -4 -2 +112 1 5.6724794675417822e+00 7.1979165692397187e+00 7.3233205047939343e-01 -1 -3 -2 +39 1 6.4178033104766934e+00 6.8891409921259603e+00 1.5315709015991064e-01 0 0 2 +269 1 7.4906317471214443e+00 6.7375050108085421e+00 4.8287801879068511e-02 3 1 0 +470 1 2.2822847309207428e-01 7.8917014242385868e+00 7.7675485207454809e-01 0 0 1 +84 1 1.3672512040847244e+00 7.9379711016796204e+00 1.6230344562139010e-02 1 0 -3 +381 1 2.6740213254054948e+00 8.0161072175846630e+00 5.8246256296805565e-01 -1 -1 1 +416 1 3.6213693754100857e+00 8.3576521057616233e+00 6.2331560400527297e-01 2 2 -1 +182 1 5.1660049409085866e+00 8.0423641799361949e+00 3.0675573970032322e-01 -2 0 -1 +347 1 6.5429973279348026e+00 7.8912499362162034e+00 4.9818784876300565e-01 -1 1 -2 +380 1 7.4990421428201417e+00 7.6756533739549013e+00 7.5184655726615468e-01 -3 0 0 +19 1 1.4643514526638863e+00 4.2228380130315317e+00 1.3843336624933156e+00 0 3 -1 +430 1 4.3057981012626918e+00 5.0291601875436083e+00 1.2152759600723888e+00 1 1 -2 +212 1 5.0477378389079224e+00 4.2727344176247888e+00 1.5457805058503180e+00 -2 -1 2 +479 1 7.9063585867776194e+00 4.9679041912501303e+00 1.6188677256489217e+00 1 3 -1 +217 1 2.5218672599222897e-01 5.8414577361250837e+00 1.5087288813722908e+00 2 1 2 +147 1 1.1545026869668784e+00 5.3345315151941692e+00 1.5114087897016817e+00 1 2 -1 +13 1 2.8751716646412713e+00 5.2058560720828826e+00 1.0454367061984737e+00 -2 1 -1 +170 1 5.1473087472466297e+00 5.7979187654554183e+00 1.4646868436401976e+00 1 2 0 +204 1 6.2817592200581442e+00 5.1972845711964553e+00 8.9956483663207842e-01 -1 -1 -1 +306 1 8.3865328816009488e-01 6.6369969836196416e+00 9.5403861420405511e-01 1 -1 3 +414 1 1.6815920771923107e+00 6.1626367729687450e+00 1.1744973520180511e+00 0 2 0 +79 1 3.3223472300560521e+00 6.2059256452802716e+00 1.3962300133956274e+00 0 0 0 +116 1 7.6213827835593815e+00 5.9109006810695996e+00 1.2032264335528675e+00 -2 -1 3 +231 1 2.7502476364406934e+00 7.3933926752228123e+00 1.4247539952768773e+00 3 -1 1 +171 1 4.7071880050572981e+00 6.8064285791523735e+00 1.2807974821888473e+00 0 -2 1 +273 1 7.0329000376929018e+00 6.8584131720717227e+00 1.6693583798609657e+00 -2 1 -1 +260 1 8.0496857982994889e+00 7.4547112671714686e+00 1.6374112984134654e+00 3 -1 -2 +43 1 1.0723712760276345e+00 8.0330657119523838e+00 1.4245919245059160e+00 -2 1 0 +208 1 4.3910624755767662e+00 7.9590156044378260e+00 1.1638330407357564e+00 -2 -2 -1 +226 1 5.5077902922207063e+00 8.0548948337249193e+00 1.2764179622713128e+00 0 0 2 +370 1 6.4142184578798505e+00 7.6748965104033502e+00 1.5118878610721513e+00 1 1 1 +85 1 4.9493682223295465e-01 4.4925581352788218e+00 1.7962077237252332e+00 2 0 0 +213 1 2.1575495906041722e+00 4.9142597626853277e+00 1.8811900528366003e+00 0 3 0 +299 1 3.3659976969419567e+00 4.3662606017243588e+00 1.7255850309353928e+00 3 -1 -2 +249 1 4.7082373488231886e+00 4.9470082900514569e+00 2.3877787475724284e+00 0 0 1 +258 1 5.7775488222451887e+00 4.8696821219175135e+00 2.2927307255193869e+00 -1 0 -1 +388 1 6.8398302583746604e+00 4.7642336931962808e+00 1.8670074890749995e+00 1 -4 -3 +91 1 7.7108281252011066e+00 4.3462922152329373e+00 2.4115614404022634e+00 2 -2 1 +415 1 1.3354264256961873e-01 5.2683808667768846e+00 2.4940378524658144e+00 0 3 0 +27 1 2.0564207334277329e+00 5.6999752852325889e+00 2.4901436865643838e+00 0 -2 -1 +465 1 3.6077777180133985e+00 5.3106225786683270e+00 2.2984375934854913e+00 1 4 2 +72 1 1.2115862831734683e+00 6.3271145377400213e+00 2.3088380865765914e+00 -1 -2 -1 +58 1 2.4730215910430200e+00 6.4108021967014599e+00 1.8872921058165837e+00 2 -2 2 +88 1 4.2468496124355051e+00 6.0000736368680991e+00 1.9689248186008255e+00 1 0 -1 +6 1 5.2824888171486872e+00 6.1177755165021708e+00 2.4648490036232666e+00 0 -3 1 +75 1 6.5540148621238368e+00 5.8837981825070473e+00 1.8049347572360399e+00 -3 0 4 +22 1 7.8390642986715990e+00 6.6390504342942007e+00 2.2941937725662940e+00 0 1 0 +128 1 5.0900588055916374e-01 6.9175984002222233e+00 1.8531623061329514e+00 -1 0 1 +193 1 1.7616411201473283e+00 7.3777929885216569e+00 1.8819221158557660e+00 -2 0 0 +389 1 3.8054852300865960e+00 7.1908689074118346e+00 1.6897648139570383e+00 1 -1 -1 +241 1 3.3304867419800805e+00 6.7201719117843943e+00 2.3881392654044515e+00 0 1 0 +14 1 5.6091745306383807e+00 6.8869789388403264e+00 1.7385824045568810e+00 0 -1 0 +307 1 2.6028552221612093e+00 7.5983757451277896e+00 2.5134014261731092e+00 -1 1 0 +401 1 3.3924470446908370e+00 8.2185878004411617e+00 1.7224400376656741e+00 -3 -2 2 +248 1 4.8372436612670020e+00 7.6440956091999794e+00 2.0947200376368853e+00 2 -3 0 +335 1 7.0823280457514208e+00 7.5905134700352388e+00 2.4694687185743178e+00 -1 0 0 +281 1 7.4000691269637358e+00 8.3890728034516613e+00 1.8965880690997261e+00 -1 -1 0 +33 1 2.6215904333511340e+00 4.6944190932346643e+00 2.8031190571610427e+00 1 1 1 +221 1 3.7745777664099962e+00 4.5666560881588492e+00 3.0592511133135365e+00 0 -1 1 +251 1 6.7715334943605834e+00 4.6401458096684580e+00 2.8510683358464712e+00 1 -2 -2 +138 1 1.1922779528936784e+00 5.1973855619084013e+00 2.5689556972218521e+00 0 1 0 +73 1 4.3331751132449305e+00 5.7904710852531096e+00 2.9618985356431899e+00 0 -2 2 +237 1 6.2300560736724488e+00 5.7475979505901522e+00 2.7248929404434143e+00 3 0 0 +77 1 7.3913602469000059e+00 5.6780386160747609e+00 2.6234944353061729e+00 -1 -1 3 +339 1 2.9546703425028309e-01 6.2132195219954411e+00 2.7867557714215443e+00 0 0 2 +69 1 1.3562182919504069e+00 6.0134582221288868e+00 3.2785943115156346e+00 0 -2 1 +150 1 2.9668876558472732e+00 5.9472670316673391e+00 2.8936146954206632e+00 3 -2 -1 +219 1 1.1608118939764323e+00 7.3228681938207032e+00 2.7126116817865631e+00 0 -3 -1 +9 1 2.0627854045350777e+00 6.7454186793237145e+00 2.9360002216641958e+00 0 -2 -2 +3 1 2.9445094497077151e+00 7.1384526732484925e+00 3.3587771406211751e+00 2 -1 -2 +93 1 3.8294061315771235e+00 7.5378392781893275e+00 2.8380550678413003e+00 -1 2 0 +68 1 4.5306997862577427e+00 6.7390306077267583e+00 2.7235217150491495e+00 1 1 3 +35 1 5.4701910601523354e+00 6.9880160912814793e+00 3.3190052755169512e+00 0 0 -2 +174 1 6.2873908725084267e+00 6.7805871501360224e+00 2.7219816088319484e+00 -1 0 2 +369 1 1.7965435829527715e-01 8.0388194202620209e+00 2.5940286664394834e+00 -3 -2 1 +469 1 4.9516921171039039e+00 7.8826764349301754e+00 3.0530014528334970e+00 0 -2 -1 +270 1 5.8608285132223106e+00 7.6946678024150144e+00 2.6778659829007898e+00 0 -1 -2 +196 1 7.6768577919393455e-01 4.8792664640921366e+00 3.5903070116215088e+00 3 -1 1 +235 1 4.2285310787746502e+00 4.2638690385284841e+00 4.1560966227762339e+00 1 1 1 +104 1 4.9767847616154004e+00 4.2048445393860883e+00 3.3962447019771562e+00 -1 -2 4 +363 1 5.8883238331527643e+00 4.9482373301020921e+00 3.4294316309792547e+00 0 0 3 +440 1 8.3695482926311691e+00 4.2129368246316421e+00 3.4018022112738615e+00 -1 2 1 +271 1 1.8521235523587734e+00 5.0753345126075313e+00 3.3807703470063983e+00 0 2 1 +385 1 3.2904276665596646e+00 5.1796485809465160e+00 3.7980367644038786e+00 -3 -1 0 +367 1 4.4116421156217953e+00 5.2602223360914149e+00 3.9453911592241386e+00 1 1 0 +8 1 5.3112651984610038e+00 5.7540585999932050e+00 3.4831459566385630e+00 3 1 0 +445 1 6.8973470747727381e+00 5.7304789961834581e+00 3.7043310817118127e+00 -2 -1 2 +95 1 8.3390772303693836e+00 5.5440620917552232e+00 3.7707087770664116e+00 -2 1 -3 +227 1 2.6907357337748437e+00 5.9773212885609901e+00 3.9377639031978284e+00 -1 -2 0 +45 1 3.9484717591019058e+00 6.4426795460159951e+00 3.6915638730283460e+00 0 1 -1 +62 1 6.2921228111843934e+00 6.5338531152740655e+00 3.8634903896998787e+00 1 0 -3 +309 1 7.4708902093515182e+00 6.5908652825436720e+00 3.6007172393059794e+00 0 -2 0 +70 1 1.1631280783050528e-01 7.0230287218135894e+00 3.4578008409773004e+00 -1 -2 2 +285 1 1.4425208756199777e+00 7.4192404188139784e+00 3.7903676842933867e+00 0 1 -1 +387 1 2.1212416378368082e+00 6.7926691438538640e+00 4.1965603113934069e+00 1 -2 -1 +484 1 4.6356188966878253e+00 7.3783035166039177e+00 4.0565969174155132e+00 0 -1 -1 +98 1 6.8865210518165370e+00 7.3140090488700187e+00 4.1918733358514384e+00 0 -2 -2 +282 1 6.6401301602074470e-01 7.9757076330778913e+00 3.4568068256572881e+00 0 0 2 +203 1 2.5721741898113022e+00 7.9369081377447577e+00 4.0156309667060688e+00 0 3 0 +189 1 3.6912207664687133e+00 7.6806375475121094e+00 3.9059717431750585e+00 0 -1 0 +267 1 6.5713653654298607e+00 7.8128923355065503e+00 3.4078724648171090e+00 -3 -3 2 +155 1 7.9603563078893247e+00 8.1399268391964696e+00 3.4070072411074963e+00 -2 -3 -1 +313 1 1.7289301235151016e+00 4.3022360027322257e+00 4.5514630316827649e+00 0 1 -2 +250 1 3.3811553057719541e+00 4.4820133651244189e+00 4.8062617116575304e+00 1 1 1 +49 1 5.2294045670482578e+00 4.6507731504297771e+00 4.3588420415784839e+00 -1 2 -1 +111 1 6.9650608689197728e+00 4.6666276346705935e+00 4.3879356756979719e+00 1 1 -1 +124 1 8.0933085752880096e+00 4.6382186065764790e+00 4.3125441053619298e+00 1 1 -1 +488 1 5.0933114935797996e-01 5.1359886989747769e+00 4.5948583401263194e+00 -3 1 -2 +48 1 1.5851307073885004e+00 5.5767207395220701e+00 4.2311661806996650e+00 2 -2 1 +123 1 2.5153837170534041e+00 5.0458650845269890e+00 4.5131929409242613e+00 -1 -1 -1 +423 1 5.8208012531410862e+00 5.4948336589690365e+00 4.5916227902397422e+00 2 1 2 +329 1 7.4993364070942770e+00 5.4866100873090584e+00 4.5194723854672301e+00 -2 -2 -2 +334 1 7.1205767430238509e-01 6.1834005542640256e+00 4.2284519502384050e+00 0 1 -1 +146 1 1.3925739327604780e+00 6.6428737862846399e+00 4.8806888882719424e+00 0 1 -2 +500 1 2.2236367185474757e+00 6.0145074663436873e+00 4.9428276989368829e+00 0 1 1 +46 1 3.3290620592760245e+00 5.9295614805296450e+00 4.9789176350871935e+00 1 -1 -3 +374 1 4.7036984817853460e+00 6.2326093994181564e+00 4.3534332596232463e+00 -2 3 2 +52 1 6.8604388860085521e+00 6.3207993870641257e+00 4.9959828655023273e+00 0 -1 -1 +466 1 8.0663702260053096e+00 6.4797694103942982e+00 4.3885769098322607e+00 0 -1 -1 +272 1 6.3857634591010626e-01 7.4306626388140797e+00 4.4685908728843007e+00 -1 0 -1 +331 1 3.1923815943971330e+00 6.8406621847783482e+00 4.3673687027144910e+00 -1 -2 0 +12 1 5.5841339297795631e+00 7.2882403606371549e+00 4.4466343965097535e+00 4 3 0 +178 1 7.5912726284672463e+00 7.0457277620611389e+00 5.0302483408547527e+00 1 -2 0 +376 1 3.5187241629329796e-01 8.3977706803073371e+00 4.6278412560289910e+00 1 -5 2 +117 1 1.3379826357324891e+00 8.3239351364630743e+00 4.2945291322530599e+00 3 0 1 +419 1 1.8896223988418746e+00 7.6393579746671740e+00 4.8168734901107717e+00 0 1 2 +379 1 3.9801436864751545e+00 7.7556819889833708e+00 4.9922836416209275e+00 -1 -2 0 +53 1 4.7488025467298804e+00 8.2113497019933241e+00 4.7088252949914393e+00 -3 0 -1 +28 1 6.4645893658779787e+00 8.2802587137451660e+00 4.5908064174413035e+00 -1 0 -2 +300 1 8.0497419370896441e+00 7.6196002867575432e+00 4.2909429383935933e+00 0 3 0 +83 1 1.5533997706565803e+00 4.2394383012204173e+00 5.5726999917011435e+00 0 0 -1 +304 1 2.5788446900543724e+00 5.0166498820006939e+00 5.6119573167673691e+00 0 -1 0 +51 1 4.2443804124641717e+00 4.2352637746580450e+00 5.1927633388220640e+00 -2 -3 1 +180 1 8.3861058182094546e+00 4.2682396578102644e+00 5.2739914655295381e+00 2 -2 0 +60 1 1.6015559581031933e+00 5.1463292330916399e+00 5.1677022459932900e+00 3 -3 0 +255 1 3.9377408204864559e+00 5.5446289337866919e+00 5.6880007643189954e+00 -1 1 -1 +336 1 4.8593163697287496e+00 5.2245778091909907e+00 5.0488368666183758e+00 0 -1 0 +74 1 5.8562368106306470e+00 5.1343487602136690e+00 5.4844996621256241e+00 -1 -1 -1 +11 1 6.8757956599703460e+00 5.1962144116573894e+00 5.2833696751528123e+00 1 -1 1 +106 1 7.8813740148695439e+00 5.1326569274050362e+00 5.5073989898629776e+00 -2 0 1 +342 1 6.9861903942379322e-01 6.2397324279420259e+00 5.5554818983126264e+00 1 2 1 +366 1 1.5648860489287066e+00 5.8898927909510999e+00 5.8373116466465662e+00 -2 1 1 +153 1 2.7314006683241487e+00 6.0747252624615822e+00 5.8658792109688322e+00 2 0 0 +384 1 3.7649825100367740e+00 6.6140228235604797e+00 5.7068894568556274e+00 3 0 -1 +444 1 5.5919895424131791e+00 6.4007885907393334e+00 5.0609268438363237e+00 0 0 1 +169 1 8.1101285040334883e+00 6.0263607659074259e+00 5.3262646023669920e+00 2 -3 2 +312 1 1.5049751572049341e-01 7.0899457593186419e+00 5.3993285616214672e+00 -4 -1 2 +131 1 2.1405833530478784e+00 6.8932472731161107e+00 5.7016479425245992e+00 -1 -2 -5 +290 1 2.9965099079972810e+00 7.1703954961127128e+00 5.3705836189910157e+00 3 2 0 +240 1 4.7341951239220323e+00 7.1100411829110195e+00 5.1242936547927025e+00 -1 -4 5 +403 1 1.1346681749692125e+00 7.6627110490212882e+00 5.5800616996766177e+00 -3 2 3 +438 1 2.6845809734218404e+00 8.2016169024289791e+00 5.0687325841194584e+00 0 1 1 +420 1 4.5890023008209200e+00 8.1137042645946345e+00 5.6817522441295134e+00 -1 -3 -3 +175 1 5.5503185230040479e+00 7.9962207684468503e+00 5.3390442156703726e+00 -1 -2 1 +305 1 6.7862386693055941e+00 7.6108734242812508e+00 5.4748415197806564e+00 -1 0 0 +341 1 7.4912815420537060e-01 4.3291768420078673e+00 6.1635194432999105e+00 -3 0 0 +291 1 3.3895079643780628e+00 4.7034241504608403e+00 6.1615817441829774e+00 -1 0 1 +372 1 4.6184126650372983e+00 4.6292373343220516e+00 6.3125880555070495e+00 1 -1 -2 +293 1 5.5228203863122962e+00 4.2760181362826257e+00 5.8887719440294752e+00 2 2 2 +461 1 6.6292898049431850e+00 4.3535966489510214e+00 5.9130834363303126e+00 0 -1 -1 +263 1 7.7259268091094802e+00 4.4631499768470926e+00 6.3337285974251110e+00 -3 0 0 +473 1 5.8703225638324330e-01 5.3945075957215893e+00 6.1889975824903258e+00 1 -2 0 +166 1 1.6857123760309740e+00 5.0889285259544463e+00 6.6009674185523135e+00 2 2 2 +132 1 6.2122393744706681e+00 5.4126385838431412e+00 6.6911128610756689e+00 -1 2 1 +32 1 7.1447545314606753e+00 5.2817185569927805e+00 6.2574893275776935e+00 -1 1 0 +475 1 3.1727782520787395e-01 6.3991068893590928e+00 6.5410008153956287e+00 -1 2 0 +283 1 3.2886765713157047e+00 6.4577609617474216e+00 6.6450211825759427e+00 -2 5 -3 +205 1 4.9096514414971573e+00 6.2573093659948649e+00 5.9509143564095108e+00 1 1 0 +158 1 6.0709604726562620e+00 6.0618280221451721e+00 5.9241072976562341e+00 2 -3 1 +1 1 6.8533858129590133e+00 6.5562074315723597e+00 6.5648993855676077e+00 -2 -1 -2 +485 1 3.8125590702509465e-01 7.4616693598979804e+00 6.3609885403345947e+00 0 0 1 +378 1 1.3081995382782592e+00 6.9194359283210023e+00 6.2966360730695206e+00 -1 1 2 +71 1 4.2456384497919455e+00 6.8188407430282236e+00 6.6601493363974615e+00 -1 -4 -4 +320 1 5.9836892193800049e+00 7.1597807912498039e+00 5.9293654675383980e+00 -1 -1 -4 +481 1 7.6691355446374123e+00 7.2329848679784536e+00 6.1548343866270958e+00 2 0 -4 +318 1 1.3774303149915195e+00 8.3818831122591373e+00 6.3319633954681702e+00 -1 3 3 +496 1 2.5904412000483283e+00 7.6460489519926949e+00 6.2257027413180230e+00 0 1 0 +319 1 3.7413059213345305e+00 7.5724262762451815e+00 6.0123092822785358e+00 -2 0 0 +338 1 5.3628717868866635e+00 8.0629328638364353e+00 6.4489666989888104e+00 0 0 2 +358 1 6.2532288322758314e+00 8.3160028104100370e+00 6.1106737733333700e+00 1 1 3 +225 1 7.1188694406828850e+00 8.0966599202958953e+00 6.7037213079900431e+00 0 0 1 +55 1 1.0372861368831470e+00 4.4696141893250934e+00 7.4049774120079697e+00 -1 2 -1 +362 1 2.7501402171762614e+00 4.4083355662417896e+00 7.3996418874276850e+00 1 -1 0 +36 1 3.7396318187704796e+00 4.4801126663955504e+00 7.1074085083223073e+00 2 0 -2 +133 1 4.7860445539287522e+00 4.2902205437459102e+00 7.3711360387569274e+00 -1 -3 -2 +412 1 5.8687821857033606e+00 4.2035753083199188e+00 6.8227572622514323e+00 -4 3 0 +89 1 9.1212528791654535e-02 5.3554487915969240e+00 7.1088625783964092e+00 0 -2 0 +38 1 1.2571311964388701e+00 5.7525434192639207e+00 7.3210178579367868e+00 -1 -5 -2 +337 1 2.7511266822038212e+00 5.4161467707559394e+00 6.9980055112843935e+00 1 -2 -1 +233 1 3.8756258886529467e+00 5.7508116329978627e+00 7.0533607828281815e+00 0 0 -1 +442 1 4.9084237493694953e+00 5.5326539633780083e+00 6.7666717819135362e+00 -4 -4 1 +143 1 5.5776680237268401e+00 5.1621486864820874e+00 7.4635021710959082e+00 1 2 -2 +194 1 7.1453664245542576e+00 5.0984438205910134e+00 7.3333812213368574e+00 0 2 0 +308 1 7.5489760640996861e+00 5.8433222820659871e+00 6.9295207313931844e+00 0 0 0 +286 1 2.1326326019163346e+00 6.2556170719743678e+00 6.7379074391662934e+00 2 0 -2 +61 1 5.6654689088882648e+00 6.2935208603774795e+00 6.9534505775907345e+00 -2 1 -1 +426 1 7.2244687144599562e-01 7.0806885112202815e+00 7.2261712082790313e+00 -1 1 -2 +450 1 1.7213783340257280e+00 7.5184636193370302e+00 6.9767252413839369e+00 4 1 1 +188 1 2.8213470931093725e+00 7.2342621084044723e+00 7.1806231926796817e+00 0 -2 -1 +94 1 3.8026924767351562e+00 7.3932541518378949e+00 7.4596688811177305e+00 -1 2 -2 +252 1 5.1481924139065534e+00 7.2239227196376481e+00 7.0808756518162852e+00 -2 1 -1 +321 1 6.2155610611385042e+00 7.5199920009866501e+00 7.0964075868161212e+00 1 -1 -3 +156 1 7.9944514874782193e+00 7.0252703915440122e+00 7.2541499279788821e+00 -2 1 0 +167 1 5.6917482122629082e-01 8.1417190335213725e+00 7.4962901784513347e+00 2 -2 -1 +330 1 3.3138446283251373e+00 8.2146300363312132e+00 6.8886408760800295e+00 1 -2 0 +195 1 4.3712446295554876e+00 7.8995454090647614e+00 6.7366600234979277e+00 -3 -1 -1 +78 1 5.1856313345157918e+00 8.2127013524100576e+00 7.4620544007464273e+00 0 -1 1 +157 1 7.9011697675935482e+00 8.1764884669087454e+00 7.4266272279794077e+00 -1 0 -1 +373 1 6.7132530681439118e-01 5.0292807223694753e+00 8.1874843860841438e+00 1 2 -2 +56 1 1.7078519119356448e+00 4.9344764637762015e+00 8.1280520422148399e+00 0 2 0 +315 1 3.6234275175752244e+00 4.9451657793378052e+00 8.0476764469560074e+00 -1 -2 0 +478 1 6.4485922344872577e+00 4.3096951707056732e+00 7.7984645181472576e+00 -3 -1 1 +109 1 8.0579925616842267e+00 4.7034410927383776e+00 8.0870676538969590e+00 -1 -2 0 +357 1 2.6884169416542560e+00 5.4427652520296803e+00 8.2033924998655703e+00 0 3 1 +454 1 4.5709826050185614e+00 5.3780110248528246e+00 7.7764192595368709e+00 1 -1 2 +86 1 6.2280039286639228e+00 5.4780735688869227e+00 8.2809905303020894e+00 -1 1 0 +262 1 7.2349855745224341e+00 5.4460551342647063e+00 8.3066978094955939e+00 0 -2 2 +160 1 6.0572932237795518e-01 6.1031638741150802e+00 8.0606222325454144e+00 2 -1 1 +168 1 2.6926304594044645e+00 6.3106981827881823e+00 7.6217004925278191e+00 -1 -3 0 +145 1 3.6817206885980216e+00 6.4794531849637744e+00 8.1206460236595284e+00 -1 -1 -2 +400 1 4.6165015313151514e+00 6.4768264075412638e+00 7.5742703650137084e+00 1 -2 -2 +395 1 5.5951136272170219e+00 6.3298964082146654e+00 7.9312370644766386e+00 0 0 -1 +297 1 6.6597873155694378e+00 6.3692836044578955e+00 7.6364845321455510e+00 1 -4 0 +207 1 8.0115395128671594e+00 5.9375044868831406e+00 7.8466324008781454e+00 -1 1 3 +460 1 6.6754980612335257e-01 7.0521823363262239e+00 8.3451452216672077e+00 -2 1 -3 +44 1 1.5985553198249884e+00 6.8250382119954756e+00 7.7469635466673052e+00 0 1 0 +377 1 4.7397084186898368e+00 7.4631435688157008e+00 8.0338331164055763e+00 0 -1 0 +411 1 5.8132963527118724e+00 7.4646663474419555e+00 8.0261071780587709e+00 2 -1 1 +396 1 7.0242838781762282e+00 7.3479893628886632e+00 7.6603703429086574e+00 2 3 0 +424 1 8.1516255787718581e+00 7.5530513248894016e+00 8.2349155971796755e+00 -2 0 0 +66 1 2.2362583327280716e+00 7.7449451931440727e+00 7.8847519662235594e+00 -1 -2 0 +371 1 3.1656541571472099e+00 8.2451918974189500e+00 8.0415029551628514e+00 1 -1 1 +186 1 4.1675976318493086e+00 8.3247658596031009e+00 7.8802494669419030e+00 1 -1 -1 +413 1 6.4945159878598346e+00 8.3677431053238660e+00 7.6981134933049891e+00 -2 -1 -2 + +Velocities + +7 -1.5197672199477208e+00 -7.1031250708487148e-01 -4.0950627961412567e-01 +398 9.4644999179644840e-01 -8.0422358764146151e-01 -1.1023964746841350e+00 +173 4.8084218242595870e-03 -1.9383808513915850e+00 9.3439099328992314e-01 +462 -1.6390116924948674e+00 -1.2080683544699562e+00 7.6685759600965364e-01 +383 1.2945118204202577e+00 -1.8370931432093225e+00 1.1758972656982776e-01 +288 1.2113220352829337e+00 4.8786819623543370e-01 7.1439798712590263e-01 +303 3.6232856211831610e-01 -3.7927260062054236e-01 -3.0467891118546553e-01 +491 1.6817821327211264e+00 6.3153250223994289e-02 -5.4827183875981267e-01 +187 5.6164532616475686e-02 1.3250338697636479e+00 1.6291294556417517e+00 +163 1.3950518510134631e+00 -1.7090097072259662e+00 -6.0029200570353536e-01 +345 -8.4635427449528189e-01 1.3013642733988193e-01 -3.7582537476409439e-01 +447 -5.5306523194731427e-01 -1.4583063079290524e+00 4.8086237208854654e-01 +108 -3.1879632824306825e-01 -4.9387838912491844e-01 -3.1046215530949173e-01 +427 2.4456665882970424e-01 1.3737057789182878e+00 -1.7474218101951866e-01 +96 -2.8217287015277487e-01 3.9138367367476778e-01 -4.6114700958040086e-02 +494 3.0303709765292064e-01 -9.2861341821366045e-02 -3.6692009603190118e-01 +360 -4.2709052063514102e-01 -1.5167804301477015e+00 -2.6902074926466563e-01 +179 -1.2403873183879999e+00 1.1368400311662936e+00 5.6653449328883521e-01 +136 -1.5859168963178476e+00 1.0728794555657983e+00 5.0876417522892092e-01 +311 -1.4232560247874224e-01 -8.2802934475265932e-01 1.0289469027582252e+00 +242 -1.3753994053365912e+00 3.0823812109914095e-01 1.2519475129774249e-01 +451 1.0684545083916284e-01 -1.3647250068831975e+00 1.2292458701523420e-01 +118 6.5344809858683794e-01 -6.0529931820910909e-01 -6.6821572295625731e-01 +200 -9.1497459004230530e-01 -7.2783581485393822e-01 -8.5265666689286446e-01 +23 1.0121027757280427e+00 -5.8690115135784995e-02 -1.4283745590726701e-01 +30 -1.2406061152582815e+00 3.7490020292637088e-01 -1.4457935973251330e+00 +26 -9.9581994312158117e-01 -1.2441026541932486e-01 -3.6541440712112250e-01 +452 -9.8588039362152369e-01 1.0706468502992672e+00 1.6282162085617651e+00 +439 -1.2386761163593085e+00 4.3106305328993799e-01 5.0410115305724934e-01 +492 2.7094066326642935e-01 6.6318883302565457e-01 1.5999014287193769e-01 +287 -1.7254693201510321e-01 1.1189096817152451e-01 4.1489954379924110e-01 +87 -1.1416496451717975e+00 -7.8732701356097490e-01 -5.1553483379997289e-01 +191 1.2771897324335735e+00 -1.7496443253093807e+00 5.0507874089360993e-01 +316 9.1440987552186193e-01 5.4700363415289210e-01 9.8622452705158947e-01 +176 1.8575029150961730e+00 4.1139607157872565e-01 -7.4590595351664335e-01 +244 -4.6943526746297531e-01 -4.0598167955478592e-01 1.2148844546685273e+00 +463 -6.0334643189530614e-02 9.0710610807033173e-02 -1.1159129056326442e+00 +101 -4.6185710842519001e-01 -1.0745733134156132e+00 -8.4095960640252343e-01 +456 7.3559492687866046e-01 8.4585098605759870e-02 9.9419285892741016e-02 +90 -6.9201778873218364e-02 4.2798282427303436e-01 4.4566581747153294e-01 +457 -2.4132420839469965e-01 8.4836564798364844e-01 1.0955942184355154e+00 +344 6.5538410556645377e-01 1.0527852545027427e+00 6.8154568558028386e-02 +399 -8.3144103937947644e-01 1.9062875610231711e-01 -5.6419101062513155e-01 +472 7.2667243545881360e-02 4.9535613585667015e-01 8.3691870718219108e-01 +364 -2.9526170592863293e-01 -8.7862728527604539e-01 8.2689159018968139e-01 +333 1.7241045239522232e-01 1.0435495829586168e-01 -4.6109306717747689e-01 +223 3.7919285550573256e-01 -1.0672385659469772e+00 3.6984147519214111e-01 +277 -8.8028901289995187e-01 5.8075379122029680e-01 -5.0555689035944396e-01 +487 1.5229361655153211e+00 -4.3334638823410909e-01 -5.2025315373818937e-01 +202 1.1890248672755632e+00 -1.3103428543183313e+00 1.3877359082264291e+00 +139 -3.8593619396281981e-01 -4.1419081865118412e-01 4.8341157555249414e-01 +218 -1.2190042149414249e+00 8.6603581070857749e-01 5.3180226853912571e-01 +10 4.0346230429792890e-01 8.8022045895227152e-01 3.0611723210999553e-01 +64 1.0687583963535543e+00 -2.5590555538042992e-01 8.2852453062456832e-01 +40 -1.0543471069223134e+00 -7.9810717548135568e-01 1.4389073785681245e-01 +328 3.2979243925573831e-02 3.1076559402174242e-01 -1.6410904905031269e+00 +292 -6.3224783054646483e-01 -4.3704038413109603e-01 -2.2759444134328080e+00 +245 -4.7869651996764956e-01 1.4231085014444791e-01 -1.0109564296885869e+00 +148 -7.1011989912373741e-01 -6.8437374624235792e-01 -4.7084060341785217e-01 +435 -2.3710180935788738e-01 7.3150957873543554e-01 1.9569288343148944e-01 +301 -7.3626400316656140e-02 -1.2518892197053937e+00 6.4938262930099341e-02 +476 4.4522311443074564e-01 8.4413304640745324e-01 6.9180275295562699e-01 +310 -4.4579106235383259e-01 -1.4870237388818175e-01 -1.7519046471808755e+00 +137 1.0191269790481958e+00 -1.1871217736355175e+00 6.0687187776921336e-01 +434 -8.0977924479936902e-01 8.2445677967490594e-01 -7.2982619204505017e-01 +184 9.3329968718567657e-01 -7.8960961760367476e-01 -3.2387020805541877e-01 +5 -7.6591643318776403e-01 4.4567507028100856e-01 -5.1504861374485367e-01 +41 -6.0827814801341495e-01 1.0335818250945215e+00 -1.4262577781586459e+00 +410 5.1617643329833041e-01 -1.2505433325731774e-01 6.5548767709779598e-01 +418 7.4452116863445517e-01 5.5176595694257291e-01 4.8433486134467341e-01 +34 -1.0960797951083094e+00 -9.5559899011366922e-01 4.1467430632337771e-01 +67 -1.4762168791195980e+00 7.3902009632630203e-02 -9.8563314451155759e-01 +390 1.1040922756330935e+00 -1.0608530270906485e-01 -8.5480860400114822e-01 +125 1.1001683944330445e+00 2.5213959563273414e-01 -7.0907876564322847e-01 +105 9.3642884637919491e-02 3.2241122070134182e-01 3.4267503174724429e-01 +274 1.4805711826308454e+00 1.9591264275732104e-01 4.6208005550476272e-01 +314 3.3724005635036658e-01 -4.9228354871213487e-01 -5.9105855919176131e-01 +408 4.1789097671328662e-01 2.2054753884758851e-01 9.9409268338014201e-01 +142 1.3529751978153088e+00 8.8819247946851643e-01 -2.9699787225611667e-02 +25 -3.6747378202427655e-01 -1.6653943785151797e+00 2.1594786196965424e-01 +458 -7.3004682150577682e-01 -2.5761460079892318e-01 -7.5492049253862104e-01 +165 -2.6776300065898789e-01 -3.1428309412647404e-02 7.6464836175889062e-01 +421 -2.2112566002008222e-01 -8.0033416193571816e-01 7.4057822704117926e-01 +151 1.6510010513742961e+00 6.2880202489514836e-01 1.3109463507573997e+00 +162 8.6466591751463506e-01 1.2003096922919751e+00 1.6720745976940152e+00 +120 7.1544273833415639e-01 7.8605685992147967e-01 1.5756692842267452e+00 +448 9.4818399052933544e-01 2.0373333697562079e-01 1.1821449360296239e-01 +50 -7.4255028295716818e-01 3.2131856627302258e-01 7.0344624031236247e-01 +443 5.1965861871733798e-01 -1.0469002555669342e+00 4.3373559100868195e-02 +482 1.9661775056161916e+00 1.8911734358361203e+00 7.2949567046678088e-01 +407 1.8582263860937767e-01 -1.3051257268383373e-01 5.0987931207045945e-01 +100 -3.7775697318477114e-01 6.0919017017007682e-01 7.1809182324625254e-01 +99 -1.2684617496938173e+00 7.3671717137305515e-01 -8.1216571664258830e-01 +107 -2.8776644836089277e-01 5.0874100972210130e-02 -4.8277793530428115e-01 +480 -1.8051331679926971e-01 8.6563042329278228e-01 -5.8352716569019136e-01 +115 -1.0589943552297505e+00 -5.2550981762149152e-01 -1.7244688387853080e-01 +278 1.5768829958501078e+00 1.4428098110470169e+00 8.8216722790329116e-01 +206 -1.3653407453215169e+00 5.7341428096126357e-01 2.2629859809279146e-01 +4 4.2054817038258185e-01 -2.9390858515089308e-01 -1.7875237254421117e+00 +214 -1.0426854223671351e+00 -9.9898728143962490e-01 -1.9850479848636837e-01 +65 4.1792586017239108e-01 8.0186431881714215e-01 -2.0107060384389590e+00 +490 6.7355251805377303e-01 -2.0698786089395388e-01 -3.1564646838967253e-01 +230 -1.3855973935201399e-01 1.6134941051796734e+00 2.3723214232474421e-01 +234 1.9442134326447673e+00 -1.3805634063343805e-01 -9.8210270774821284e-01 +92 1.8063255781217027e-01 9.3716955232457000e-01 -1.9187798219061411e-01 +103 3.9559605419305116e-01 -8.7523159162324915e-01 -8.6993815784441930e-01 +129 1.2253789920819727e+00 2.8171983713246046e-01 -1.2180024131333584e+00 +483 9.6349992375661320e-01 -1.6348662648160852e+00 8.3712630375667085e-01 +110 8.0468512955471783e-01 -1.2623668571621947e+00 1.4617362498240638e+00 +126 -2.5741602043303030e-01 -2.5870568275485217e-01 -1.0282926023825125e+00 +276 7.0448925295125497e-01 -1.4486777483518087e-01 4.0589927727544634e-01 +294 4.9277474123671205e-01 1.0788251906341033e+00 1.0720476037391911e+00 +417 1.2134126298835348e+00 6.9341902173441838e-01 -6.5915650495314448e-01 +102 -9.4530772564542864e-01 3.5633646308084904e-01 2.1645216652503570e-02 +228 -3.1477177829390662e-01 -7.3015136189998553e-02 -1.3550661516185267e+00 +2 -1.5167590438599203e+00 3.2942906513575698e-01 -3.9430403043809092e-01 +222 -3.4853423639550507e-01 4.3402827204271471e-01 -8.9055344775375656e-01 +332 7.6671266929186610e-01 -4.2789189576138797e-01 2.2589706911206284e-01 +275 1.0091776783462640e+00 -2.3875723103898955e-01 9.0625273137588058e-01 +243 -2.4008164703199580e-01 2.4602229249214927e-01 -9.2542387373274426e-01 +394 -1.5496683613302659e-01 -1.4731622236607285e+00 4.3712483588631723e-02 +325 -9.9716991278750156e-01 9.0079916710328489e-01 6.1656781470423368e-02 +468 1.1367478364774437e+00 3.7268550263787792e-01 -1.9334998928228118e+00 +113 -6.1637943298580211e-01 -1.4259739417714792e-01 8.5880390205968704e-01 +47 1.4522907038923785e+00 -3.8138661441744320e-01 8.8902241399148096e-01 +474 -6.5326939744671464e-01 -2.9584216080544323e-01 -1.6703076347144621e-01 +343 -2.0865957540293453e-01 4.2320136734663144e-01 -5.2417372636564219e-01 +257 -1.6598412047016537e+00 3.9614126419385248e-01 6.2190463287430520e-01 +422 9.8621254947118736e-02 -4.5653326655140536e-01 -1.6959752095102018e-01 +317 8.1554926779727022e-01 -9.3952932101149422e-01 4.1213701483558629e-01 +322 9.7692194337623842e-02 7.2221211185210599e-03 1.4983319825497168e+00 +453 1.0112117753398511e+00 -1.6043121320019077e-01 -1.5109389287188097e+00 +82 -1.8245270960375951e-01 -1.7227940931260219e-01 9.4914460423273084e-01 +402 -1.3974163926710530e+00 1.0238276956317611e+00 -2.7116721725643461e-01 +201 3.8154565319070316e-01 6.1162789639306392e-01 5.3616806651343329e-01 +159 -5.5623509138788352e-01 -7.3506001159846490e-01 2.9669217335717424e-01 +239 -1.1703971851987414e+00 -2.5624748584715568e-01 1.9987978519039629e-01 +340 -1.1651635328367398e+00 1.4923285973929103e+00 -5.0638764373602618e-01 +246 4.6916366638116123e-02 3.7471219463751682e-01 -8.5595777185358435e-01 +268 3.9776335718715750e-01 -4.1326407010466332e-01 6.4132644428253849e-01 +238 -1.0102989663900468e+00 2.5145560602393102e-01 1.6537809300909093e-01 +183 9.6649446422314420e-02 -3.1320299584600791e-01 8.9787784698607254e-01 +393 -8.1709715097020719e-01 6.5173585509788723e-01 1.3340272254613436e-01 +177 9.2862946550951486e-01 -7.4734865012317631e-01 2.6370187436425798e-02 +97 4.0324698775550633e-01 -6.5060630846020162e-01 4.3381661619662121e-01 +16 9.2225588537666714e-01 5.0761227093429218e-01 2.8224038915514177e-01 +15 7.4631799122735776e-01 -5.1735298399747642e-01 1.0267699639319139e+00 +467 2.9233724948217432e-01 -1.7768548355681282e-01 9.7602334864839035e-01 +404 -1.5591392530118275e+00 6.9331308976828065e-01 -1.9156738913640683e-02 +190 -2.3293475783192552e-01 1.9804103372721136e+00 1.4400561196029799e-01 +429 -3.1240905856412571e-01 5.3399975662794008e-01 -8.3672459106253427e-01 +356 1.1813499076138427e-03 4.5712322524667531e-01 7.2569550209608535e-02 +459 -8.2536401218028255e-01 -5.1587534016495484e-01 -3.2429633054579987e-01 +140 1.4169132709847332e+00 -1.6948863471126016e-01 -5.4590188862129563e-01 +121 -8.0623019357608017e-02 4.3956025735145637e-01 -4.1230464603429395e-01 +76 1.2149513180720075e+00 3.0664283974167705e-01 -6.2206166881984115e-01 +24 -9.0953792569391767e-01 2.7208504534230199e-01 -5.7968255809907288e-01 +215 -3.6635366137979941e-01 4.9083544993599626e-01 5.4540399677247620e-01 +406 -2.8711421371139068e-01 9.8028410382686626e-01 -1.5688211456737056e+00 +495 -2.6064405757207798e-01 1.0014422650657693e+00 1.2195244915445839e-01 +256 5.1502433217852350e-01 2.1618101498582329e+00 -5.5671498574760148e-01 +348 1.5303758850601952e+00 1.3538700733088898e+00 -1.0404602147830582e+00 +18 8.7092721144608365e-01 8.8296303804909981e-01 4.7076145062566271e-01 +259 1.2579194534410862e+00 1.1275351076124240e+00 1.3255628670657002e-01 +261 -1.9457509303164215e+00 -5.7013962206489510e-01 6.5787967104563000e-02 +433 9.5143226323949670e-01 -1.9787864106930605e-01 2.1852616176467912e+00 +119 5.2387728877999162e-01 -7.5008900728698324e-01 1.0156553043596699e+00 +181 3.6547537534957092e-01 4.4856602347738611e-01 1.9502787843157898e-02 +392 1.8883706922750969e+00 1.0836617564317459e+00 6.8587165903540392e-01 +436 -7.5639783907079583e-01 -6.1811736982444421e-01 -2.2826839230281426e-01 +210 -4.5965909404278121e-01 -3.3285948977797364e-01 -2.8815683118847368e-01 +409 -8.9630394160430427e-01 1.2498156320615575e-01 -5.9486610361558201e-01 +437 -1.3458244785393264e-01 1.1611540390359226e-01 3.6707571520967769e-01 +114 -1.1780626363780731e+00 1.1424256338126497e+00 8.1898562672334474e-01 +431 3.9295632541408759e-01 -5.0466339464883803e-01 7.4701651109545830e-01 +296 -2.9562979314792992e-01 9.3794387166245374e-01 4.3821394647736267e-01 +477 -1.0332993738194893e+00 5.9732959339716152e-01 -1.6990304505644747e+00 +199 -1.0627414724922077e+00 3.6527741326574337e-01 -5.0076911469807539e-01 +31 1.1836258822972918e+00 -1.1805527709262840e+00 -1.0148440310730218e-01 +149 -9.7152762187442065e-01 -8.2689424175864179e-01 -9.8308725322137946e-01 +216 -2.6635793686817422e-01 -1.1249304826477751e+00 5.2675272099922754e-01 +425 -2.8287490221194594e-01 4.1086346332975782e-01 -5.6774387340250076e-01 +224 -6.3636066584070272e-01 3.8927738472590379e-01 -1.2043299828812322e+00 +352 -6.1634031766701480e-01 1.9344774717790295e-01 7.8374630494614250e-01 +289 -9.8440193523267117e-01 1.4471012647779624e+00 4.3922631035860088e-01 +455 -1.1942412759322272e+00 1.1300600257912083e+00 -4.7958784865825908e-02 +298 -8.4955866984285322e-01 1.8131854984076790e-01 -1.9010434504555735e+00 +42 1.1523609145851870e+00 1.3198909771653919e+00 -1.1943568765046571e+00 +80 2.0928302112510155e+00 -1.9281727715628849e-01 3.5879525261694012e-01 +127 1.0275071129063864e+00 -7.8560029105042173e-01 8.4680312545585235e-01 +232 3.2836195573764693e-01 1.5921754951459595e+00 -1.4889040346123832e+00 +368 -1.0143580013676627e+00 -6.3286772474382025e-01 5.9003271203631313e-01 +486 3.0052735920053437e-02 -7.5917073216074904e-01 -4.0707609210832352e-01 +209 8.3084043466989521e-01 -1.2819860426930157e-01 9.8450910112172252e-01 +152 1.4018503697498572e+00 -1.3364969077798206e+00 5.1589841760235022e-01 +353 5.3822763779176630e-01 -4.8322987719532096e-01 3.5634065895494110e-01 +361 6.8264929410893807e-02 -1.3682995245096907e+00 -4.9273501999153174e-01 +391 6.7065636846264776e-01 -2.7904582963633018e-01 -6.5303896626046076e-01 +464 7.4567031841775322e-01 1.8046977002555891e+00 -3.9159771733015164e-01 +497 -1.3856699033357778e+00 -9.3599081481125368e-01 1.8204759707532270e-01 +498 1.0568629143757213e+00 -6.8137997517450560e-02 -3.7037143049187105e-01 +264 -7.7894167957263005e-01 2.7383401106980504e-01 7.5098074183721675e-01 +265 1.9614235016832940e-01 -4.2322665725562197e-01 -1.6236881765469688e-01 +350 -6.3256317352651559e-01 -5.9660876051471468e-02 5.2317095757618959e-01 +144 -2.0800773378554721e-01 -1.5156540919629388e-01 -1.8814379829548375e-01 +17 8.2103760580927843e-01 -1.1053143150308307e+00 1.1843881143235824e-01 +441 9.9191589104231870e-01 -1.9479579195707514e-01 1.8130903870970541e+00 +266 -9.4957121369000017e-02 8.6997989048119007e-02 -1.5903074475142429e-01 +375 8.1973242957273296e-01 1.0075441532082390e+00 6.6277118207442265e-01 +351 -1.0694047025049973e+00 1.2185457298237026e-01 7.2731979237547140e-01 +254 -1.4178061472233576e+00 1.0044925984667965e+00 5.9999609355847139e-01 +29 4.7548776272319854e-01 1.5001558567759110e-01 7.4645778473548241e-01 +198 5.7218364285113010e-01 2.9196622753924861e-01 -1.2231490460337893e+00 +197 -1.4204377171882598e-01 2.1322055429678972e+00 3.5360172437812271e-01 +192 -4.3230000337254881e-01 8.1753260459826260e-01 -1.3961239427992276e+00 +220 9.2338173032465304e-02 7.2444278115958327e-01 4.3240465996750244e-02 +432 3.3837153168712325e-01 1.9757543957217949e-01 -3.2138599665961831e-01 +20 7.2972771576502227e-01 -7.1923547937920451e-01 3.8462300862783037e-02 +63 7.3459759072459618e-01 -2.1393390343408181e-01 1.4606085672716668e-01 +446 9.5489174587921177e-01 -2.2284752482645742e-01 -7.3147917056258960e-01 +172 -2.3233356067616504e+00 5.9696639792861073e-01 1.0311292046090428e+00 +405 -1.1779705391325133e+00 2.7846640825986735e-01 9.6440591359561623e-01 +323 1.2758508066094825e+00 1.1576865878083721e-02 1.3415986450967126e+00 +428 4.8021126796596170e-01 4.8003440312842205e-01 -4.1979524804611819e-02 +280 1.1723141734791545e+00 8.4290272518839332e-02 1.1240738308596074e+00 +54 8.2234236592018339e-02 6.9377897211693754e-02 4.6415665343815465e-01 +397 -2.4516753222359439e-01 -3.8647076314136586e-01 -3.1938187998240442e-01 +279 4.7475740365710301e-01 4.2033019401821775e-02 1.0791025172609856e+00 +134 4.6986570792330234e-01 7.5759398359663410e-01 1.3748348906115773e+00 +236 -1.4338534270949166e+00 6.5242359063738475e-01 -4.6879001422000049e-01 +493 -9.5795155221589912e-01 -9.3131490174346532e-01 5.2848598027407112e-01 +346 9.4017276609564226e-01 -1.4423054432512394e+00 -3.4883627652283825e-02 +295 -1.0211326704350645e+00 -1.5688283678435537e+00 2.2737881805456114e-01 +164 4.2933435120637192e-01 1.2908919636558716e+00 -1.8559649323965055e-01 +354 3.1232180266110654e-01 -3.3884174823706786e-01 2.0012570054806617e-01 +449 -4.1956545717722271e-01 8.5033185910749320e-01 -1.6923286514167504e+00 +185 -1.9847768794898153e-02 1.6093024328157031e+00 -3.4278049008935008e-01 +229 3.3354615877618454e-01 -2.1967770508754222e-01 -6.8288738508757751e-01 +135 -3.8598562184690355e-01 2.3459455181853697e-01 1.4591795655915993e-01 +211 -1.1858788850370010e+00 1.3960363241794649e+00 1.1390210005059993e+00 +81 1.3604301094625607e-02 1.2711357052370638e+00 1.3840699383718491e+00 +37 3.5025606454523878e-01 -1.0316562604143946e+00 2.4944393037279400e+00 +355 -4.2713554752038259e-02 1.6441819966987897e-01 -8.2550044716409909e-01 +141 8.1569462514471880e-01 -6.8558748209733145e-01 7.0575287639668483e-01 +122 3.6274405429300222e-01 -4.1985688325391463e-01 2.5223429679090231e-01 +327 5.2689568302324030e-01 -7.1530570005039407e-01 8.1591540939993761e-03 +386 5.2540176252260506e-01 -8.0891002493580211e-01 -2.1080798818740659e-01 +59 3.8008676468324715e-01 1.2302042558959829e+00 -1.2893140123247515e+00 +57 -1.2756375464672689e+00 7.7885421584605408e-01 9.3218921154320622e-01 +489 -4.8772350194378145e-02 1.1774319824237183e+00 -1.3413963962772402e+00 +326 -8.2073133946066312e-01 -1.2685275916429064e+00 -1.0253535003668284e+00 +359 -1.8991447869255085e+00 -1.2154594584519982e+00 -1.7832247863578221e-01 +349 6.4100123949028309e-02 -3.2723074072385872e-01 -1.5565659217165477e+00 +247 4.5734576435732799e-01 4.9511064545143885e-01 -1.5945768728559153e+00 +365 -5.7917383718454207e-01 -2.3494837692508375e+00 -1.5004840205862926e+00 +382 -1.0813649655944142e+00 4.1831946020927230e-01 2.9306887264755938e-01 +130 -5.7488571729114812e-01 -9.6638702807104448e-01 -1.0278487271582966e+00 +471 1.1085411185196428e+00 -5.2404573748396321e-01 9.9734211160337760e-01 +21 -2.3901903994255250e-01 9.3578484419611557e-02 -7.3719660959071831e-01 +499 -4.9906801820206043e-02 4.0499214176262377e-01 -2.9371307883695258e-01 +253 -7.8928864460108283e-01 3.3838305995790624e-01 7.1275694444227405e-02 +302 1.5254343639867396e-01 -2.7881674166920268e-02 9.3627888140226589e-01 +324 -1.1159492478288651e+00 7.0632546920242867e-01 2.5394407536700253e-01 +154 4.8736880051344117e-01 -1.3177717741071062e+00 -7.9660620460177323e-01 +284 1.7716036485389025e-01 -3.6539348914929748e-01 -4.6941135291661629e-01 +161 -2.7479275242316997e-01 7.6982156884799513e-01 -5.7088165723862649e-01 +112 2.7023059482372042e-01 2.0487127767495705e-01 -4.9350658197380731e-01 +39 -1.6840134362063963e-01 -2.0596641915022665e-01 1.2622422574022685e+00 +269 -1.4448376822412348e-01 4.2047192520573229e-01 -1.5751711248887881e+00 +470 -9.0198936075875380e-01 -1.0035762705605118e-01 1.1469701649179411e+00 +84 -3.7165102348228596e-01 1.5718083932531206e+00 1.8715295090631173e-01 +381 6.6740577340943863e-01 -1.9432536761932276e-01 -1.2757754863002870e+00 +416 7.2118119300841677e-01 8.6813375765446710e-01 -8.2350198387929519e-01 +182 3.3694648494832763e-01 -6.5830649973844163e-02 7.0076361524788766e-01 +347 -2.4949061953800500e-01 -1.1432286442922379e+00 -4.5757988288631407e-01 +380 -4.7143472726668367e-01 -2.9180051952103941e-01 6.0581680882348110e-01 +19 7.3007216948068054e-02 9.4947888518620716e-01 3.4627576178512598e-01 +430 -2.2320112834958328e-01 -3.8559102989803251e-01 1.0427665572858709e+00 +212 2.0170777345036919e-01 -9.9332505765821755e-01 1.0550787824425254e+00 +479 -7.7910620356048310e-01 -3.4759422268929079e-02 -5.8756871125828891e-01 +217 -7.0999315500035393e-04 -1.4874035841198494e-01 -1.1389898361525996e+00 +147 3.3953282916732841e-01 1.0503362153417393e+00 8.8554338624429496e-01 +13 1.4952012032076418e+00 -1.3492024227588162e-01 8.6834761707295621e-02 +170 -7.5230462228731629e-02 1.9875829136564169e-01 3.8512887113920530e-01 +204 -3.1459152727076800e-01 -7.2066080044499392e-01 -7.5277710665622377e-01 +306 8.0529112138128323e-01 5.8633344712571456e-01 -1.8256367575393391e+00 +414 -1.7678147014996259e+00 -7.1706970804552028e-01 6.0614094017430153e-01 +79 1.6303375369704216e+00 -9.6446702691566244e-01 1.7014099837302679e+00 +116 -1.7231587507489068e+00 -2.2374070614774770e+00 1.0442058418221465e+00 +231 -9.2546402309298947e-01 3.2157716287919585e-01 -6.2797312032694141e-01 +171 1.4243635122212095e+00 -3.4034411127116748e-01 2.1060050313379719e+00 +273 1.0777993506600285e+00 -1.7160435918146213e+00 1.0194335249558832e+00 +260 1.4544392536719453e+00 9.3765863648494707e-01 1.9642995061847701e+00 +43 -1.1326443879098980e+00 1.4846909715454837e-01 1.6874301063514463e+00 +208 -1.1234489793683757e+00 -1.0408587064067447e-01 -4.8386417041968832e-01 +226 6.0458834484750512e-01 -4.5998597100022531e-01 -4.1420588246526679e-01 +370 -2.5264443636161833e-02 2.2888129565845829e-02 -7.5123334633027034e-02 +85 8.9124431006375637e-01 1.2150234901900336e+00 -1.3794080039677503e+00 +213 6.8750004752856542e-01 1.7643443473184617e-01 -7.6039731517372366e-01 +299 1.1166507011383160e+00 -6.9342398354435130e-03 4.0031150060326554e-01 +249 1.6676259701674601e+00 -1.2916011404415280e+00 8.3081388251616040e-02 +258 -3.3338543070706111e-01 -5.9677208925291603e-01 1.5474505568531705e-01 +388 3.3338877926510010e-02 3.6534261356710357e-01 -9.3163626937108668e-01 +91 -1.0436539497197659e+00 -3.6206368573074071e-01 -3.9406149981573330e-01 +415 1.8022253921012549e-01 -2.5361248238828202e+00 7.8761564433010600e-01 +27 1.6681493439613866e+00 -6.3832374162264238e-01 5.5825898158707521e-01 +465 -3.3883292906124979e-01 -1.9199928938047977e+00 -1.8916637827626086e-02 +72 -7.0256791239588934e-01 -1.3631374858756479e-01 1.1539466869530803e+00 +58 -9.7431680335045678e-02 -1.0635351953490649e-01 -4.5168665276313896e-01 +88 5.2937320884954953e-01 -1.1749454649084838e+00 1.2744645330784524e-01 +6 -1.1593774469007843e+00 5.1413980407296589e-01 1.2944389470552302e+00 +75 5.7374690805841033e-01 7.8433587872715163e-02 -3.9368611836917572e-01 +22 -9.6996881290452630e-01 1.2965746977075312e+00 -1.5394575986278033e-01 +128 -6.4070276272082638e-01 2.2708380115440285e-01 -1.6142126045669323e+00 +193 5.7443781554989515e-01 -1.2458447329459237e+00 -9.3796775534017041e-01 +389 6.7276417854677451e-01 -2.9571653479618748e-01 -2.4423872850138906e-01 +241 -1.4102001294916395e-02 -9.9059498112109945e-01 -8.4827835518272399e-01 +14 1.0515231184651945e+00 4.5617222115757200e-01 -5.2911246318135385e-01 +307 5.0028759749137040e-03 -5.0699437336696396e-01 -2.5790615969461578e-01 +401 3.9207473498457289e-02 -9.2058641117610335e-02 -1.3689269955592848e-01 +248 1.5980225324156891e-01 -4.1600725109692227e-01 4.1253240562945254e-01 +335 5.6526042010051392e-01 -1.0563866873458263e+00 2.2963431232103770e-01 +281 4.7335967523683825e-01 -9.8764893385752195e-01 1.3275577965292436e+00 +33 9.5713349527393091e-02 -6.7578402785937589e-02 1.2308478723868863e+00 +221 -1.2080117435798665e+00 -2.2142477880517361e-01 -7.4060132361069042e-01 +251 5.1352069062594478e-02 -4.7173153686079722e-01 1.3645828375870801e+00 +138 -9.8931419992230929e-01 -5.6403083699472267e-01 4.8437720371021081e-01 +73 -8.7238230525929461e-01 2.2619668651197614e-01 -3.2904255863286136e-01 +237 1.5966476021876643e+00 -1.0734521639966819e+00 -5.7008225474518526e-01 +77 -6.5731386120913948e-01 -1.3672574981662913e-01 3.4397972232069346e-01 +339 6.8164227902965080e-01 -8.7654828372118265e-01 -9.2289632523091991e-01 +69 -1.0710495228647499e+00 -2.2798359247984892e-01 -1.4017813863798740e+00 +150 1.4728554201464288e+00 1.6873571351133543e-01 -4.9418642333439719e-02 +219 3.5347912349947574e-01 1.9808724875609746e-01 -5.0625815831733678e-01 +9 6.3426493841071574e-01 2.5658018877632760e-01 4.7953937763858034e-01 +3 -1.2144659411870629e+00 8.8297859772359033e-01 -2.6732917639533704e-01 +93 3.3822789154655764e-01 8.1143107440094409e-01 -2.7263548606070248e-01 +68 1.6351351046383883e-01 5.8445604848527588e-01 1.2062643207337951e+00 +35 1.6550671586466750e-02 1.6790869037398232e-01 2.5947900830558479e-02 +174 1.4403688502275100e+00 1.4476094634256031e+00 -1.0038411182890872e+00 +369 -7.9303772394565375e-01 -1.1303432398125628e+00 -1.5457440975201582e+00 +469 -5.5239153419630294e-01 -4.7075634498045837e-01 5.0539291079888571e-01 +270 5.0351525849727374e-01 1.3716919044917866e+00 2.8157430421583601e-01 +196 -2.2101193763774998e-01 9.4257812877032954e-01 -1.2881024224381614e+00 +235 -3.7410168233289759e-01 1.9712488864630789e-01 4.3505985658493368e-01 +104 1.0770774810232915e+00 -7.7420116923795979e-01 6.4885799952777634e-01 +363 6.7513909705285891e-01 2.0186757605257277e-01 6.2260156794788402e-01 +440 -2.7563401138205812e-01 4.0196589304635166e-01 6.8696509786942431e-02 +271 3.5486339796858996e-01 -1.0482634846478769e+00 1.0693870635001128e+00 +385 2.0653637543135839e-01 1.6251902525248125e+00 -1.0240800582063148e+00 +367 1.2090890730479817e+00 7.7304993739281713e-01 -9.1196492155951403e-01 +8 1.5650406952874099e-01 1.4761853226272020e-01 7.1667065222651671e-01 +445 -8.1712119890107360e-01 5.0169175756271378e-01 6.3236875667481124e-02 +95 7.1388432342661989e-01 -2.6058252368101411e-02 -1.0159428151424379e+00 +227 -2.1897046900910261e-01 2.4492515107534649e-01 2.2679621024062299e-01 +45 5.0120245022687604e-01 -1.1380921868404339e+00 7.8232214243213327e-01 +62 5.7973338245234629e-01 -7.9117819757367913e-01 -5.4327640325878901e-01 +309 5.3715909018265540e-01 -1.7844975395461696e-01 -1.5521878040430610e+00 +70 -1.3649193486754410e-03 -1.5242800010205164e+00 1.6225001176581824e+00 +285 -1.5639729724408871e+00 1.4626366106935738e+00 -1.2286649953661322e+00 +387 -1.3226131489884976e+00 -1.0695489941103238e+00 -1.4911056977700106e+00 +484 -1.5087954881419843e+00 -2.3407968100714568e-01 8.9471716161806181e-01 +98 3.0624993702155973e-01 -1.5934008419723134e-01 -1.0700290072035785e+00 +282 -1.1678279208732874e+00 8.8816716100523774e-01 -3.7594379985472404e-01 +203 -1.2339287833422024e+00 -1.0851718286318821e+00 -3.5581216293468138e-02 +189 -2.5647410293058537e-01 -3.6970804452537526e-01 -2.7009745024499721e-01 +267 -2.3687566231483211e-02 5.4572876490146272e-01 1.2664988400691950e+00 +155 1.7966821085178111e-01 -1.4254017034472058e-01 -1.0614594824352175e+00 +313 -4.0833555690731410e-02 -4.8193546027639250e-01 -4.9602679807507655e-02 +250 4.3395781720710452e-01 9.1543705256636054e-01 -6.1780594432179192e-01 +49 -5.8496932778818223e-01 2.0845066341835794e-01 -4.5660091846264866e-01 +111 -4.5577164923909808e-01 6.7687347516731056e-01 1.6901306317755600e+00 +124 2.3017035971304120e-01 1.2116381200880159e-02 -1.0715278739263430e+00 +488 1.1047027896127255e+00 -1.2726762145955062e+00 7.7913777050672761e-01 +48 -4.0669146183799126e-01 -1.0181831306551499e+00 6.3005463620771884e-01 +123 -1.2387342693749222e-01 -4.5460712390743935e-01 -5.6241811130968811e-01 +423 1.5051226831159246e+00 -1.0959205280256621e+00 3.5512567969767117e-01 +329 1.5695087401093555e-01 -4.0810545929224507e-01 1.5430948869435961e+00 +334 -1.0908322951892016e+00 -5.4577269465148259e-01 -6.7521769754195848e-01 +146 4.4361993166914260e-01 4.1962079818012665e-01 -3.9049661615798353e-01 +500 -3.0319085884539942e-01 7.6039240701443311e-01 2.0836797720615632e+00 +46 -3.9431430053902639e-01 3.7259615958614579e-02 -1.6080049457594121e+00 +374 9.2250275913940816e-01 -5.4107808677407720e-01 6.5542912197165987e-02 +52 7.3216124676172201e-01 9.4572980131871298e-01 -1.2696251833719743e+00 +466 1.5044833940669236e+00 -1.8483350803538756e+00 9.5057050040256286e-01 +272 -4.0011978947412463e-01 2.0429884556820119e-01 5.4991808303431688e-01 +331 5.7814259377466726e-01 1.7044411336854475e+00 -7.2986308719737536e-01 +12 6.8240059668147224e-01 8.7756006768431916e-01 -1.7546792723746987e+00 +178 -1.1529369201979585e+00 -3.9576843637993819e-01 4.1654001054549866e-01 +376 -1.3678465511836193e+00 -1.5404423718061540e+00 -8.9187926122069050e-01 +117 -1.3804354656475164e+00 -7.9075263667417661e-01 -3.6776627245391275e-01 +419 1.9706999335859632e-01 -1.1815264434314889e+00 1.6458873301525292e-01 +379 2.6784097653047051e-01 3.8806722276897926e-01 4.0603233211791162e-01 +53 1.0521966172519035e+00 5.1164934479578683e-01 9.8577725800730365e-01 +28 1.0129347774638686e+00 1.1058165761432928e+00 -3.9034581257989903e-01 +300 -1.4876024382598509e+00 -6.2467982655378396e-01 1.3535210035045362e+00 +83 9.7907832581926901e-01 1.4855763621624507e+00 4.1007013403839504e-01 +304 -1.8963384707907098e+00 5.2731582964002444e-01 1.0962854464287259e+00 +51 8.5928138726119452e-01 -1.6162178311396377e-01 3.2746315986045632e-01 +180 -3.4136546641226267e-01 1.2771151944969144e+00 -3.9578085701206223e-01 +60 -1.7295103304379962e+00 -9.9492157200179920e-01 1.2224650147307308e+00 +255 5.0740556300071860e-01 -6.6892801585963790e-01 -4.4506165343889975e-01 +336 3.8821455708543523e-01 7.4853146108739210e-01 1.0393955338521590e+00 +74 -8.5185557539809709e-01 5.0099005012093123e-01 6.1187012870886726e-01 +11 -4.8598509694234203e-02 -9.7638648911006976e-01 3.6444261502684167e-01 +106 -8.3362253193144206e-02 -9.2846276284357510e-01 -2.0625632391587234e-01 +342 -1.1850581603543699e+00 -4.6587013939418070e-01 -2.5534356631266192e-01 +366 1.8044603030107276e-01 1.2857436498573065e+00 -4.9002876775305132e-01 +153 5.9355527141792742e-02 8.5309993046714749e-01 1.2601655126485285e+00 +384 -3.3199740732000493e-02 -1.1586262754841152e+00 2.7022536481319620e-01 +444 1.4366137360637837e-01 7.4777392856310121e-01 -1.3874143079892032e-01 +169 -2.2106828869911426e-02 4.3869453276997300e-01 -8.6050040149769946e-01 +312 -9.5175409360636343e-01 -7.9143766981298624e-01 -8.3109524929616485e-01 +131 -1.2661930440833327e-01 4.9560114120296378e-01 1.2481373975929652e-01 +290 4.7988720115251880e-03 4.3307801179133432e-01 7.2537453656012302e-02 +240 -1.4417217340859769e+00 1.4357683004122099e+00 1.4379544254488796e-01 +403 9.8034468727423985e-01 -2.0429774540476020e-01 -1.4454160823908524e-01 +438 1.1944593880989134e+00 2.3483551676455661e+00 1.9984542691081373e+00 +420 1.3486549938776343e-01 -1.0783938772052266e+00 2.4823645088293936e-01 +175 1.6188020186225376e+00 -1.2786027961604105e+00 1.5779664705358573e-01 +305 7.6304434006850930e-01 -1.0609104653684724e+00 -1.3992599279449225e+00 +341 -5.0986549858189378e-01 9.7345649143358179e-02 -5.4583627317992767e-01 +291 1.4937985447982780e+00 1.1345276617898410e-01 1.4735185581275405e+00 +372 2.7438262099508215e-01 2.3642499432513167e-01 2.0103126185436837e-01 +293 -2.0812456222423235e+00 1.2585017395884830e+00 -1.9872753884412903e-01 +461 -1.7366155414350635e+00 6.7563297134023181e-01 -3.0241597324899727e-01 +263 -9.9912758612453856e-01 -1.4606803538340738e+00 -4.8347668759554979e-01 +473 1.5452042518079840e+00 1.3194414166855677e+00 -1.4200655529990689e+00 +166 -1.4143951087621034e-01 -1.1527398628151918e-01 -2.0324403247706951e-01 +132 1.6628716981291343e+00 -2.6751309910688231e-02 -1.1291183234894089e+00 +32 -2.3270952903472897e-01 2.1018074582476372e+00 1.1203402946623044e+00 +475 -1.2604994295453433e+00 -9.5339439518226154e-01 3.3662881048301792e-02 +283 1.2749816341052314e+00 4.7341406662300863e-02 8.5345288522412766e-01 +205 9.6825352209774937e-02 -1.0389613158303228e+00 -5.4505460161674724e-01 +158 8.8743698904591217e-01 1.0689060843560156e+00 6.1104406355038210e-01 +1 5.8273461256327774e-01 4.6658417699512433e-01 7.3964607391058224e-01 +485 4.4170048227791719e-01 -2.8581555293721217e-01 -1.0141724005249320e+00 +378 6.6654429040499424e-02 -1.8224880226392848e+00 1.2128685020610477e+00 +71 -9.1996605782626056e-02 -2.2291664206826806e-01 -1.2402752353304469e+00 +320 -9.5597854323421563e-02 -8.6852053155856812e-01 3.5066557017928385e-01 +481 7.9532304801009490e-01 -9.4180364698072971e-01 1.2495155405965916e-02 +318 -4.1450336396091869e-01 3.4120074122917260e-01 7.3720013366972947e-01 +496 -6.2393347120986375e-01 1.0494054390993381e+00 -1.0590359239552802e+00 +319 -9.5320962685676391e-01 9.2021659866276262e-01 5.5763202369068265e-01 +338 9.5180850251473562e-02 7.4581412234581979e-02 1.6562396473566801e+00 +358 -1.2039319191565336e+00 9.9635011716776112e-01 3.7776599320072252e-01 +225 -2.2878377680357529e-02 5.9393376646738183e-01 -1.7855655501046659e+00 +55 -1.2658412487783444e+00 8.3827650617277571e-01 3.7229167676011687e-01 +362 -1.1190412417889151e+00 -4.1093870066191052e-01 -5.5943417408569930e-02 +36 -1.1944064170110538e+00 5.2834231151049005e-01 -2.5661926163189262e-02 +133 -3.4046853819783990e-01 8.7037698795660379e-01 2.6480748394474063e-01 +412 -8.2433971673189854e-01 -2.4364204997081215e-01 3.2182546651866395e-01 +89 1.2836300411162869e+00 -1.0534639266932924e+00 1.7273414295686953e+00 +38 -5.4018176537409834e-01 1.1189891674084445e+00 -5.4949279328730405e-01 +337 2.0680174457636453e+00 6.8756290663232345e-01 -5.3885707707990127e-01 +233 -1.1580426595635346e-01 1.5352867360847791e+00 1.8920809100075830e-02 +442 -5.2273629379374242e-01 -4.7573534425197805e-01 3.3349171376798270e-01 +143 -6.4456717536444386e-01 -8.2221860127843815e-01 -3.1619033462829965e-01 +194 4.6000888686458724e-01 2.4868203858229221e-01 -1.0844456124091726e+00 +308 7.0412401782623568e-01 1.4705120558853640e-01 -1.6984333842169479e+00 +286 -7.4825969157579242e-01 1.7277684761952672e+00 -4.8658995664543186e-01 +61 -1.4087152482721139e+00 -1.9786675343505181e-01 -8.3049863139435698e-01 +426 -5.9317510584759559e-01 -3.1176204329742879e-01 9.7102624547089234e-01 +450 2.9660411649227686e-01 -1.0106432265344207e+00 -8.0608626908093450e-01 +188 -4.5857896465809017e-01 8.3781782180488956e-02 1.6679261825519407e+00 +94 -2.5105865768429703e-01 -1.0277678550758340e+00 2.0110948522283595e-01 +252 -2.6250057461969262e-01 -8.3779492646801812e-01 -1.2443616941843418e+00 +321 -1.3849227082743394e+00 -7.9961814140752485e-01 -5.2758719721869440e-01 +156 9.7248559724229044e-01 1.4856613149426096e-01 -1.0762319296143268e+00 +167 4.8758255885458768e-01 3.7938735168685944e-01 -8.2720329905212353e-01 +330 4.8147102008722625e-01 -2.9945144880586022e-01 -7.3225876236181275e-01 +195 1.6483881807209144e+00 -1.9598861592898009e-01 -6.2914422221089158e-02 +78 2.3726384124165131e-01 2.7514362192855252e-01 1.7571172900747967e-01 +157 -2.1865090243933563e+00 3.7632441115528098e-01 7.7333806476534650e-01 +373 9.8884980347773899e-01 3.4121263358683873e-01 -3.3626353210018556e-01 +56 -5.1874451216553419e-01 5.0889073330236878e-01 -7.4130187216438104e-01 +315 1.5052362221906064e+00 -6.9840962964633502e-01 -1.5488907100876634e-01 +478 1.1082301092586995e+00 2.9770971389874379e-01 -5.4580262091212584e-01 +109 9.4396405787734605e-01 4.9318782477478235e-01 1.2284569230572757e+00 +357 -1.2371693160029509e+00 4.1376978558912775e-01 3.3529522630256764e-01 +454 -4.4342104600414733e-01 -3.5633346291025164e-01 -2.1131375082322554e-01 +86 -2.6274074185542590e-01 -2.7912975828127135e-01 -3.4564526880694488e-02 +262 -2.0457328043953218e-01 -1.0549970010682790e+00 -8.7569945910877856e-02 +160 1.2508565962389259e-01 -1.1049367732134203e+00 -4.1414673901133031e-01 +168 -5.4833844765552298e-01 9.1122623641564449e-01 -8.1466354261018392e-01 +145 1.9424847461468013e+00 2.4717259671440775e-01 -8.1993106446423478e-02 +400 -7.3927429735196926e-01 9.6278828542903949e-01 9.8355595468926471e-01 +395 -1.7616642510587566e+00 -4.3839255400156163e-01 -4.3538389834393521e-01 +297 -8.1651466743971568e-01 1.9480772762641770e-02 -2.5090574319554337e-01 +207 9.9594630394692474e-01 -5.5333584932915503e-01 -8.1672262386449701e-02 +460 3.8863838706865678e-01 -1.9332061145329088e-01 7.5599739666291188e-01 +44 4.9322334356812642e-01 5.4097595609480897e-01 1.2096517945019982e+00 +377 -2.3588544095100208e-02 4.2843373407529362e-01 -8.1372583759538197e-01 +411 -7.0187880935114388e-01 -8.8403807864596529e-01 3.6139298123719504e-01 +396 -6.1324863673452068e-01 -5.8579142070009704e-01 -1.6980414017487730e+00 +424 -1.4405372913769463e+00 -1.2161267905238037e-01 -7.1275775203655271e-03 +66 -2.2455608800065441e-01 -5.5194985983545303e-02 -2.9031971555487707e-01 +371 2.8693892522161785e-01 -1.3942754264578394e-01 4.1638095135795372e-01 +186 -2.5542578648985415e-01 -2.7435394726950046e-01 1.6931522427410683e+00 +413 5.6007135922818607e-01 1.2189064363493711e-01 -7.5271223366515216e-01 diff --git a/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial b/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial new file mode 100644 index 0000000000000000000000000000000000000000..613f46935cba579aa52fba7f6cfaed40a932ef3a --- /dev/null +++ b/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial @@ -0,0 +1,28 @@ +# uniaxial NVT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + +fix 1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5 + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 diff --git a/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1 b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..2c13448192214878b0d36c4568820ada2ed610dc --- /dev/null +++ b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1 @@ -0,0 +1,281 @@ +LAMMPS (22 Sep 2017) +# uniaxial NVT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca + orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 500 atoms + reading velocities ... + 500 velocities +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0) + +fix 1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5 + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.62256 + ghost atom cutoff = 1.62256 + binsize = 0.811281, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton/tri + stencil: half/bin/3d/newton/tri + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.742 | 2.742 | 2.742 Mbytes +Step c_1_press[1] c_1_press[2] c_1_press[3] + 0 6.3937851 7.0436438 6.4461087 + 50 10.369902 10.999889 6.5437384 + 100 12.411384 13.145871 7.8892802 + 150 12.88182 12.114068 7.5155182 + 200 10.375571 10.979773 6.5624056 + 250 10.158901 10.251273 5.4525068 + 300 10.011314 9.8371382 5.4031393 + 350 9.4890008 9.7992108 5.054963 + 400 9.1715116 9.2647886 4.9600208 + 450 9.8495961 9.3353483 4.986786 + 500 9.5903809 8.576173 4.8252116 + 550 8.8057509 9.7152078 4.9089022 + 600 8.9743682 10.031277 4.8924072 + 650 10.233852 9.3059527 5.1977058 + 700 9.2361906 9.6439971 5.0923763 + 750 10.449751 9.5911465 5.2548714 + 800 9.8366857 9.9873885 4.9456956 + 850 10.266351 9.9620134 5.0602713 + 900 9.6325813 9.3910946 5.1466624 + 950 9.1911919 9.1519006 4.8275345 + 1000 9.2871712 9.8508501 5.2674099 + 1050 9.7052035 10.291793 5.160359 + 1100 9.9425552 10.361827 5.5520484 + 1150 10.377563 10.26588 5.5997436 + 1200 9.6368269 10.354413 5.4137873 + 1250 9.2071055 9.1706334 4.9800411 + 1300 9.7140516 8.7257777 4.8536943 + 1350 10.145983 9.8247797 4.8921813 + 1400 9.1998975 9.6935675 5.2269871 + 1450 9.4838136 8.9035726 4.6867121 + 1500 9.066389 9.811658 4.8875316 + 1550 10.007083 9.8946118 5.204609 + 1600 9.722137 9.9291588 5.326712 + 1650 9.7424614 9.6995966 5.3577819 + 1700 9.7185135 9.5922931 5.385856 + 1750 9.6368481 8.1414594 5.0046079 + 1800 9.8065782 9.6798036 4.92473 + 1850 9.0463737 10.024205 5.6495695 + 1900 9.0774484 10.527582 5.0363426 + 1950 9.1691461 10.049703 4.8517514 + 2000 9.7619602 10.049532 5.2088038 + 2050 9.9203348 9.8056395 5.7699852 + 2100 9.1455665 9.5864018 5.1410542 + 2150 8.8501135 9.7917307 5.141824 + 2200 9.1433685 9.8168548 5.1742441 + 2250 9.3248968 10.454532 5.5456356 + 2300 10.210505 9.361499 5.0935226 + 2350 9.5041186 9.598986 5.2451005 + 2400 8.968896 9.2811921 4.7799868 + 2450 9.8201159 10.133892 4.9981817 + 2500 9.4421084 9.7875766 5.4124809 + 2550 9.6410643 9.9148309 5.0603288 + 2600 9.3810676 9.5346849 5.3233836 + 2650 9.2340337 8.7233538 5.0361302 + 2700 9.405597 9.7848488 4.7537042 + 2750 9.9447412 9.2366502 4.9736459 + 2800 10.189443 9.452684 5.624362 + 2850 9.6680124 9.0941543 5.0101469 + 2900 8.4350889 9.1595018 4.7706866 + 2950 9.0056117 9.3498593 4.7655963 + 3000 9.4795416 9.7400747 5.4705868 + 3050 9.6605264 9.4558374 5.1240166 + 3100 9.681451 9.7966554 5.3174458 + 3150 9.7887528 9.7371194 4.9349103 + 3200 9.9720054 9.5592538 5.2306417 + 3250 10.185023 9.9436037 5.4127312 + 3300 9.9670676 9.6069307 5.026261 + 3350 9.0090411 9.5975249 4.8881019 + 3400 9.6535707 9.748683 4.9933873 + 3450 9.7774977 9.7570511 4.8512619 + 3500 9.7250053 10.36386 5.4057249 + 3550 9.567788 9.773413 5.3936671 + 3600 9.2094148 9.0211247 5.2328675 + 3650 9.3512098 9.73407 4.576624 + 3700 9.159074 9.2611591 5.2996448 + 3750 9.4772798 8.9792211 5.1007079 + 3800 9.3898357 9.1150609 5.2126508 + 3850 9.2466312 9.1460651 4.867472 + 3900 9.9316993 9.3061137 5.1219265 + 3950 9.2550913 9.780254 5.371885 + 4000 10.005356 9.7328972 5.2535791 + 4050 9.5228242 9.5134113 5.4181393 + 4100 9.576808 10.082425 5.1272491 + 4150 10.20265 9.6667058 5.529118 + 4200 9.9443671 10.09427 5.6163734 + 4250 9.4956875 9.9462818 5.0820085 + 4300 10.350144 10.003702 5.1621505 + 4350 10.119486 9.8691507 5.4913457 + 4400 9.4991604 10.516185 5.2512264 + 4450 9.0812854 9.2835603 5.1695714 + 4500 9.7538478 8.5298834 4.5954607 + 4550 9.8920394 9.3581299 5.202587 + 4600 9.2694921 9.3338291 5.201958 + 4650 10.925818 9.5062049 4.8694531 + 4700 11.020014 10.130224 5.582409 + 4750 9.5005142 9.3571561 5.456739 + 4800 9.6670147 9.6628702 5.0451252 + 4850 9.134147 9.1308788 4.7950594 + 4900 9.7466206 8.6171443 4.8716275 + 4950 9.0397505 9.1996036 5.2010502 + 5000 9.6898652 9.8914655 5.3959279 + 5050 10.06771 9.7523891 5.3690408 + 5100 10.000963 9.6279379 5.4077384 + 5150 9.8686159 10.179702 5.0686824 + 5200 9.8866247 9.725152 5.4350049 + 5250 9.2068346 10.214424 5.3187713 + 5300 9.713992 9.8069045 5.496359 + 5350 9.423852 9.364085 5.2144606 + 5400 9.4350241 9.5584633 5.0339001 + 5450 10.555124 10.784922 5.1938072 + 5500 9.4147344 10.33187 5.4360602 + 5550 9.8514653 9.6575827 5.4959779 + 5600 9.3138107 9.6592624 4.941387 + 5650 9.1224809 8.7112257 5.0435936 + 5700 8.8289158 10.749686 4.8916132 + 5750 9.7200279 10.030606 5.2033161 + 5800 9.8439873 9.6289015 5.5131934 + 5850 9.6257294 9.4128988 4.9196038 + 5900 9.7490214 9.5776313 5.0301815 + 5950 9.1430855 10.108944 5.1406243 + 6000 9.3358644 9.5633737 4.9787073 + 6050 9.4432774 8.9464304 5.1466052 + 6100 8.8878373 9.5048946 4.9190238 + 6150 9.6451898 9.2419823 5.0159841 + 6200 9.5042173 8.9414307 5.2634247 + 6250 9.0896505 9.7230651 5.3340322 + 6300 8.8100599 8.8781352 5.4117914 + 6350 9.3104601 9.0233294 5.3136432 + 6400 9.368101 9.6387362 4.7833216 + 6450 10.334343 9.8384149 5.3606204 + 6500 9.8824036 10.022627 6.0857086 + 6550 9.7034443 10.026765 5.17604 + 6600 9.3757845 9.899268 5.2301359 + 6650 10.540821 10.4343 5.5287065 + 6700 9.6317649 9.8923579 5.6045768 + 6750 9.5982772 10.07897 5.1221451 + 6800 10.239883 10.189967 5.3167447 + 6850 10.017271 9.7680902 5.229621 + 6900 9.6200416 10.129301 5.1998759 + 6950 9.0361417 8.923798 5.1652612 + 7000 9.3153521 9.063054 4.6860773 + 7050 8.6434091 9.0363436 4.7811975 + 7100 9.4955395 9.3830541 5.022538 + 7150 9.3392402 9.1847119 5.1544622 + 7200 9.4676321 9.8370036 4.8854442 + 7250 9.5115882 10.350324 4.9780525 + 7300 9.6025583 9.6247917 5.473794 + 7350 9.8919524 10.049446 4.9816931 + 7400 9.6814319 9.9410894 5.265078 + 7450 9.4130955 10.191436 5.2531256 + 7500 9.8114668 8.8461635 5.0562894 + 7550 10.321567 9.4730124 5.2043655 + 7600 9.5059024 9.8330367 5.0749721 + 7650 10.067084 10.606423 5.5598818 + 7700 10.896159 10.084281 5.5159718 + 7750 9.754306 10.162301 5.2475876 + 7800 9.7278145 9.801009 5.0685504 + 7850 9.8639905 10.323104 5.2458864 + 7900 9.7246799 9.1377357 5.1841319 + 7950 10.381792 9.6977533 5.0977386 + 8000 9.7265224 9.2999829 4.7925571 + 8050 9.5203288 9.3144956 4.7539211 + 8100 9.6900973 9.6614063 5.304712 + 8150 9.713677 9.9154149 4.8178575 + 8200 9.4733597 9.8948632 5.036112 + 8250 9.7783036 9.9554334 5.3355682 + 8300 9.4034783 9.9156801 5.5539279 + 8350 9.2984025 9.2013949 5.0753991 + 8400 9.9299078 9.6166801 5.115776 + 8450 9.526737 9.3312125 4.7800587 + 8500 9.581956 10.065906 4.9756092 + 8550 9.2767953 9.326006 5.3024978 + 8600 9.799968 8.4660845 5.0199109 + 8650 9.8985354 10.127852 4.9098064 + 8700 8.7952691 10.521133 5.6840528 + 8750 9.8299997 9.5588553 5.3085734 + 8800 9.0811776 9.5704532 5.1684993 + 8850 9.8303571 9.7618932 5.1251259 + 8900 9.9238794 9.9654863 5.2999683 + 8950 10.851304 9.9682289 5.4133763 + 9000 9.5523794 9.1890766 5.1469144 + 9050 9.7461948 8.9611236 4.9490826 + 9100 10.138917 9.6757567 5.0473544 + 9150 9.4869835 9.4786575 5.0142464 + 9200 10.263518 10.079135 5.1493398 + 9250 9.8691684 9.0908275 5.3221203 + 9300 9.8586707 9.4177643 5.1525265 + 9350 9.3375816 9.9167208 5.4846207 + 9400 9.5603903 9.4813199 4.6237495 + 9450 10.30892 9.5217736 5.6163214 + 9500 9.327949 9.9831649 4.8923915 + 9550 9.8421656 9.3202702 5.3352046 + 9600 8.8543704 9.4556702 4.6430041 + 9650 10.103399 9.2161072 4.8658062 + 9700 9.507811 9.9647378 5.227369 + 9750 9.4988096 8.9942893 5.2491418 + 9800 9.8007958 9.234452 5.1740203 + 9850 9.6029685 10.076042 5.023107 + 9900 9.4035691 10.13782 5.2775777 + 9950 9.6517135 10.355994 5.3035779 + 10000 9.9157616 9.7741952 5.5269431 +Loop time of 3.47119 on 1 procs for 10000 steps with 500 atoms + +Performance: 1244529.812 tau/day, 2880.856 timesteps/s +99.5% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.53077 | 0.53077 | 0.53077 | 0.0 | 15.29 +Neigh | 2.219 | 2.219 | 2.219 | 0.0 | 63.93 +Comm | 0.21866 | 0.21866 | 0.21866 | 0.0 | 6.30 +Output | 0.0027909 | 0.0027909 | 0.0027909 | 0.0 | 0.08 +Modify | 0.41956 | 0.41956 | 0.41956 | 0.0 | 12.09 +Other | | 0.08042 | | | 2.32 + +Nlocal: 500 ave 500 max 500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 908 ave 908 max 908 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3489 ave 3489 max 3489 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3489 +Ave neighs/atom = 6.978 +Neighbor list builds = 4567 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:03 diff --git a/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4 b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..276eb2f443017d2b9fb1c4e52f8d981896fcb6c4 --- /dev/null +++ b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4 @@ -0,0 +1,281 @@ +LAMMPS (22 Sep 2017) +# uniaxial NVT deformation of WCA fluid + +units lj +atom_style atomic + + +pair_style lj/cut 1.122562 +read_data data.wca + orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 500 atoms + reading velocities ... + 500 velocities +pair_coeff 1 1 1.0 1.0 +pair_modify shift yes + +neighbor 0.5 bin +neigh_modify delay 0 + +change_box all triclinic + triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0) + +fix 1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5 + +fix 2 all momentum 100 linear 1 1 1 + +#dump 1 all atom 25 dump.lammpstrj + +#dump 2 all cfg/uef 25 dump.*.cfg mass type xs ys zs + +thermo_style custom step c_1_press[1] c_1_press[2] c_1_press[3] + +thermo 50 +run 10000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.62256 + ghost atom cutoff = 1.62256 + binsize = 0.811281, bins = 11 11 11 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton/tri + stencil: half/bin/3d/newton/tri + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.71 | 2.71 | 2.71 Mbytes +Step c_1_press[1] c_1_press[2] c_1_press[3] + 0 6.3937851 7.0436438 6.4461087 + 50 10.369902 10.999889 6.5437384 + 100 12.411384 13.145871 7.8892802 + 150 12.88182 12.114068 7.5155182 + 200 10.375571 10.979773 6.5624056 + 250 10.158901 10.251273 5.4525068 + 300 10.011314 9.8371382 5.4031393 + 350 9.4890008 9.7992108 5.054963 + 400 9.1715116 9.2647886 4.9600208 + 450 9.8495961 9.3353483 4.986786 + 500 9.5903809 8.576173 4.8252116 + 550 8.8057509 9.7152078 4.9089022 + 600 8.9743682 10.031277 4.8924072 + 650 10.233852 9.3059527 5.1977058 + 700 9.2361906 9.6439971 5.0923762 + 750 10.449751 9.5911465 5.2548714 + 800 9.8366858 9.9873885 4.9456956 + 850 10.266351 9.9620134 5.0602713 + 900 9.6325811 9.3910947 5.1466627 + 950 9.1911906 9.1518996 4.8275336 + 1000 9.2871726 9.8508561 5.2674079 + 1050 9.7052019 10.291782 5.1603609 + 1100 9.9425827 10.361848 5.5520588 + 1150 10.37757 10.265891 5.5997246 + 1200 9.6367069 10.354451 5.4138749 + 1250 9.2075006 9.1705136 4.9806417 + 1300 9.7126123 8.7247301 4.8528533 + 1350 10.146034 9.8319205 4.8958266 + 1400 9.19638 9.7052094 5.2332913 + 1450 9.4667073 8.9014702 4.7073387 + 1500 9.0949401 9.8381729 4.8904182 + 1550 10.068959 9.8096692 5.1613658 + 1600 10.131443 9.7086921 5.0913546 + 1650 9.9557572 9.9183989 5.2635245 + 1700 9.3820418 8.6135867 4.9395498 + 1750 8.9486365 9.3433293 5.2674469 + 1800 10.352322 10.354432 5.3335153 + 1850 8.9629784 9.6172224 5.0824828 + 1900 8.5429652 9.749815 5.1577895 + 1950 8.9828002 8.9631646 5.0948426 + 2000 9.4171989 9.7122117 5.1386251 + 2050 9.767299 9.6146113 4.8965319 + 2100 10.049399 10.02243 5.3856622 + 2150 9.2639644 10.09669 5.3020322 + 2200 9.1916162 10.841127 5.0271109 + 2250 9.9859708 8.519568 5.24568 + 2300 9.9462279 9.5850706 5.0632906 + 2350 9.8043011 9.7605126 5.3148323 + 2400 9.089818 9.6474522 5.0012486 + 2450 9.655874 9.3255636 4.8291262 + 2500 9.0259445 9.3074827 5.1593031 + 2550 9.1610478 9.7356492 5.0741161 + 2600 9.3153881 9.3936949 5.4830363 + 2650 9.6212696 9.598252 4.7172875 + 2700 9.318906 9.665656 5.1917527 + 2750 9.6613838 9.7106796 5.1843146 + 2800 10.231844 9.9407641 4.6940455 + 2850 10.008093 9.2781076 5.2624957 + 2900 10.464168 9.808666 5.1457978 + 2950 9.7080601 9.6972304 5.406456 + 3000 9.6851209 10.050737 5.1198394 + 3050 8.9093581 9.213428 5.233108 + 3100 8.8098921 9.6607476 4.9625331 + 3150 8.6608386 9.7503441 5.0737533 + 3200 9.7004403 9.6748778 5.0734462 + 3250 10.077054 10.318711 5.3233841 + 3300 10.63887 9.4901739 5.516542 + 3350 10.232909 9.3407073 5.4989967 + 3400 9.8854134 9.9542625 5.1982468 + 3450 9.9388203 9.3394716 4.8135833 + 3500 9.2514026 9.5857527 5.4605449 + 3550 8.3940282 9.0549836 4.6951548 + 3600 8.7696659 9.6262816 4.6302087 + 3650 9.0695173 8.8520895 5.0814723 + 3700 9.4700744 9.2777557 4.6825004 + 3750 9.0221844 9.3407486 5.3984156 + 3800 9.1478369 9.1420043 5.3024474 + 3850 9.2652818 9.700053 5.3355364 + 3900 10.248456 9.2480211 5.2375956 + 3950 9.8259922 9.6130415 5.5978761 + 4000 9.404877 9.7931698 5.3767927 + 4050 10.131713 9.7047295 5.2964594 + 4100 9.8128638 9.4253237 5.5308166 + 4150 10.25183 9.1333595 5.1957555 + 4200 10.449736 9.295762 4.7863033 + 4250 9.7304858 9.4482515 5.356439 + 4300 9.2773777 9.4110855 4.9879246 + 4350 8.9325082 9.3429549 5.0410132 + 4400 9.2603855 9.7905381 4.7436126 + 4450 8.7600443 9.9160722 5.196316 + 4500 9.0824514 10.036035 4.873051 + 4550 9.3884333 9.6644343 5.1154951 + 4600 9.8181676 9.6385118 5.3639835 + 4650 9.1574799 9.5386974 4.7487094 + 4700 10.275911 9.5383553 5.1084297 + 4750 9.2476854 9.3353591 5.4773008 + 4800 9.3422407 9.1931821 4.9210291 + 4850 9.9033126 9.6443642 5.1334553 + 4900 9.8469467 9.6836455 5.5101146 + 4950 10.211229 9.760253 5.0151668 + 5000 9.3256273 10.109873 5.4129479 + 5050 9.9704879 9.8504809 5.2191163 + 5100 9.4820718 9.711504 4.9633504 + 5150 9.2952171 9.6955742 5.3214246 + 5200 9.8400791 9.4763906 4.9149518 + 5250 10.080112 9.5634741 5.1701348 + 5300 9.4221014 9.8149742 5.3605931 + 5350 8.8228402 10.121343 5.3192212 + 5400 9.7318719 10.508763 5.0044083 + 5450 8.7429847 9.6583774 5.033313 + 5500 9.9243256 10.239521 5.2956506 + 5550 9.6088558 9.8555986 5.359257 + 5600 10.904352 10.72033 5.7874034 + 5650 9.4774477 9.7246962 5.596949 + 5700 9.8527139 9.2188245 4.6518586 + 5750 9.7932375 9.2763721 4.6663307 + 5800 9.5630073 9.1704583 5.4784197 + 5850 10.159996 9.5206168 5.0012706 + 5900 9.1667978 9.6891715 5.1959301 + 5950 9.2194131 9.2170699 5.1653264 + 6000 9.5917124 9.2038051 5.1004966 + 6050 9.4141124 9.9166471 5.0535712 + 6100 10.231166 9.7746591 5.2399634 + 6150 9.6054192 9.34446 5.0711646 + 6200 9.9279645 8.9546561 5.4698039 + 6250 9.1581437 8.95439 4.9336111 + 6300 8.9257232 9.0665473 5.188718 + 6350 9.7685695 9.0822789 4.8454457 + 6400 9.5140226 9.5073414 5.3420644 + 6450 9.5379198 9.3316002 5.1940338 + 6500 10.216584 9.8982859 5.2790157 + 6550 10.221286 10.913585 5.4521496 + 6600 10.925166 10.088055 5.2079758 + 6650 10.012501 9.3870455 5.1740433 + 6700 10.176139 9.4869588 5.3783369 + 6750 9.2819415 8.1431975 4.8256476 + 6800 9.790723 9.7932943 5.3602927 + 6850 9.1327199 9.8354267 5.1356642 + 6900 9.240909 9.3854618 5.1767268 + 6950 10.652806 9.4355671 5.1314851 + 7000 9.1676917 10.055501 5.2824651 + 7050 9.6486488 9.1063089 4.9596976 + 7100 9.3733678 9.5875095 5.0882285 + 7150 9.282243 9.354934 4.6752644 + 7200 8.7370777 10.1445 5.0217245 + 7250 9.563419 9.929442 5.1720916 + 7300 9.9128346 9.2426863 5.3595023 + 7350 9.9319949 9.9479477 5.4844629 + 7400 9.4671739 9.2380048 5.3274217 + 7450 9.7531193 9.4075641 4.9426738 + 7500 10.331422 9.4704554 5.3036636 + 7550 8.7724434 9.663179 4.9492563 + 7600 10.222748 9.1674852 4.8135992 + 7650 9.5288583 8.5866929 5.0342955 + 7700 8.8832847 8.8035037 4.6184473 + 7750 9.4847076 9.7969656 5.0035855 + 7800 9.2782486 9.1283641 5.3151299 + 7850 8.962989 8.7729872 5.0374817 + 7900 9.6336556 9.4975426 4.9832641 + 7950 10.125279 10.044247 5.2740873 + 8000 9.9705097 9.6925988 5.2344188 + 8050 9.7836318 9.5794261 4.9174534 + 8100 10.79561 10.04809 5.1604639 + 8150 9.7261726 10.372878 5.3083843 + 8200 10.338569 10.08189 5.6555172 + 8250 9.7833315 9.9834209 5.3790753 + 8300 9.5857944 9.6052323 5.0689289 + 8350 9.6706688 9.6189242 4.7725678 + 8400 10.570423 9.5786093 5.1894242 + 8450 9.6514599 10.168359 5.0733592 + 8500 10.273682 9.9179284 5.4014563 + 8550 9.3120287 10.146837 4.9895115 + 8600 9.511943 9.644112 5.462624 + 8650 10.380674 9.1117114 5.156727 + 8700 10.068596 8.7687113 5.1440814 + 8750 9.2484971 9.2477678 4.9318794 + 8800 9.7298469 8.9480303 5.1151321 + 8850 9.7299502 10.415138 4.7902908 + 8900 10.966912 10.732962 5.4793574 + 8950 10.328384 9.9501313 5.6238396 + 9000 9.7385041 9.8319224 5.1926497 + 9050 9.7971055 9.5740203 5.1111302 + 9100 9.7789727 9.9281901 5.1786549 + 9150 9.9306964 9.3360599 4.9524547 + 9200 9.8798841 10.240752 5.1691344 + 9250 10.185445 9.4934917 4.9188964 + 9300 8.9184663 8.9349408 4.8079511 + 9350 9.6552187 9.9846949 4.9619969 + 9400 10.304306 9.2298208 5.2822855 + 9450 9.8379613 10.041703 5.4186514 + 9500 10.221443 9.5342818 4.8929802 + 9550 9.9723047 10.072856 5.4169676 + 9600 9.3923879 9.2984387 5.4452785 + 9650 8.9072589 9.7482374 4.7835208 + 9700 9.8370121 10.205922 5.0385145 + 9750 9.4274542 9.4653248 5.6340681 + 9800 9.7668106 9.3265705 5.3154126 + 9850 10.422549 10.362922 5.361592 + 9900 9.6264407 9.9790162 5.6381052 + 9950 10.35018 9.8853593 5.2639184 + 10000 9.6190853 9.7903758 5.1583115 +Loop time of 1.32471 on 4 procs for 10000 steps with 500 atoms + +Performance: 3261084.476 tau/day, 7548.807 timesteps/s +98.4% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.1301 | 0.13489 | 0.13886 | 0.9 | 10.18 +Neigh | 0.5654 | 0.575 | 0.5883 | 1.3 | 43.41 +Comm | 0.35135 | 0.37147 | 0.38856 | 2.4 | 28.04 +Output | 0.0028336 | 0.0032777 | 0.004292 | 1.0 | 0.25 +Modify | 0.16328 | 0.16711 | 0.17231 | 0.8 | 12.61 +Other | | 0.07297 | | | 5.51 + +Nlocal: 125 ave 128 max 122 min +Histogram: 1 1 0 0 0 0 0 0 1 1 +Nghost: 477 ave 489 max 469 min +Histogram: 2 0 0 0 0 1 0 0 0 1 +Neighs: 861 ave 877 max 848 min +Histogram: 1 0 1 0 0 1 0 0 0 1 + +Total # of neighbors = 3444 +Ave neighs/atom = 6.888 +Neighbor list builds = 4560 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:01 diff --git a/examples/coreshell/in.coreshell.wolf b/examples/coreshell/in.coreshell.wolf new file mode 100644 index 0000000000000000000000000000000000000000..337057d0c829b9b033503e37c7d1cd8b5cdf7aa6 --- /dev/null +++ b/examples/coreshell/in.coreshell.wolf @@ -0,0 +1,71 @@ +# Testsystem for core-shell model compared to Mitchel and Finchham +# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation. + +# ------------------------ INITIALIZATION ---------------------------- + +units metal +dimension 3 +boundary p p p +atom_style full + +# ----------------------- ATOM DEFINITION ---------------------------- + +fix csinfo all property/atom i_CSID +read_data data.coreshell fix csinfo NULL CS-Info + +group cores type 1 2 +group shells type 3 4 + +neighbor 2.0 bin +comm_modify vel yes + +# ------------------------ FORCE FIELDS ------------------------------ + +pair_style born/coul/wolf/cs 0.1 20.0 20.0 # A, rho, sigma=0, C, D +pair_coeff * * 0.0 1.000 0.00 0.00 0.00 +pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na +pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl +pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl + +bond_style harmonic +bond_coeff 1 63.014 0.0 +bond_coeff 2 25.724 0.0 + +# ------------------------ Equilibration Run ------------------------------- + +reset_timestep 0 + +thermo 50 +thermo_style custom step etotal pe ke temp press & + epair evdwl ecoul elong ebond fnorm fmax vol + +compute CSequ all temp/cs cores shells + +# output via chunk method + +#compute prop all property/atom i_CSID +#compute cs_chunk all chunk/atom c_prop +#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 +#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector + +thermo_modify temp CSequ + +# velocity bias option + +velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ +velocity all scale 1427 temp CSequ + +fix thermoberendsen all temp/berendsen 1427 1427 0.4 +fix nve all nve +fix_modify thermoberendsen temp CSequ + +# 2 fmsec timestep + +timestep 0.002 +run 500 + +unfix thermoberendsen + +# ------------------------ Dynamic Run ------------------------------- + +run 1000 diff --git a/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1 b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..35c30913fcb46b65294dfcc6f1ae4955420dcfa7 --- /dev/null +++ b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1 @@ -0,0 +1,190 @@ +LAMMPS (22 Sep 2017) +# Testsystem for core-shell model compared to Mitchel and Finchham +# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation. + +# ------------------------ INITIALIZATION ---------------------------- + +units metal +dimension 3 +boundary p p p +atom_style full + +# ----------------------- ATOM DEFINITION ---------------------------- + +fix csinfo all property/atom i_CSID +read_data data.coreshell fix csinfo NULL CS-Info + orthogonal box = (0 0 0) to (24.096 24.096 24.096) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 432 atoms + scanning bonds ... + 1 = max bonds/atom + reading bonds ... + 216 bonds + 1 = max # of 1-2 neighbors + 0 = max # of 1-3 neighbors + 0 = max # of 1-4 neighbors + 1 = max # of special neighbors + +group cores type 1 2 +216 atoms in group cores +group shells type 3 4 +216 atoms in group shells + +neighbor 2.0 bin +comm_modify vel yes + +# ------------------------ FORCE FIELDS ------------------------------ + +pair_style born/coul/wolf/cs 0.1 20.0 20.0 # A, rho, sigma=0, C, D +pair_coeff * * 0.0 1.000 0.00 0.00 0.00 +pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na +pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl +pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl + +bond_style harmonic +bond_coeff 1 63.014 0.0 +bond_coeff 2 25.724 0.0 + +# ------------------------ Equilibration Run ------------------------------- + +reset_timestep 0 + +thermo 50 +thermo_style custom step etotal pe ke temp press epair evdwl ecoul elong ebond fnorm fmax vol + +compute CSequ all temp/cs cores shells + +# output via chunk method + +#compute prop all property/atom i_CSID +#compute cs_chunk all chunk/atom c_prop +#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 +#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector + +thermo_modify temp CSequ + +# velocity bias option + +velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 22 + ghost atom cutoff = 22 + binsize = 11, bins = 3 3 3 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair born/coul/wolf/cs, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +velocity all scale 1427 temp CSequ + +fix thermoberendsen all temp/berendsen 1427 1427 0.4 +fix nve all nve +fix_modify thermoberendsen temp CSequ + +# 2 fmsec timestep + +timestep 0.002 +run 500 +Per MPI rank memory allocation (min/avg/max) = 8.59 | 8.59 | 8.59 Mbytes +Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume + 0 -637.41039 -677.06805 39.657659 1427 -21302.622 -677.06805 1.6320365 -678.70009 0 0 1.2935454e-14 3.3306691e-15 13990.5 + 50 -635.62711 -667.67134 32.044236 1153.0465 -4533.0717 -669.45724 37.76997 -707.22721 0 1.7858939 9.6844521 2.2001656 13990.5 + 100 -632.76337 -662.83035 30.066977 1081.8989 -3492.8736 -664.98454 39.197093 -704.18164 0 2.1541967 11.063962 2.1543406 13990.5 + 150 -630.82538 -663.70056 32.875182 1182.9464 -74.330324 -666.12202 46.263665 -712.38569 0 2.4214607 11.739683 2.7558416 13990.5 + 200 -629.1541 -664.54637 35.39227 1273.5187 -1707.5508 -666.87772 41.796197 -708.67391 0 2.3313445 10.594804 3.0025376 13990.5 + 250 -627.86587 -662.60879 34.742918 1250.1531 -1258.7537 -665.21416 43.017024 -708.23118 0 2.6053655 10.576999 1.8400986 13990.5 + 300 -627.10755 -664.12897 37.021419 1332.1403 -1891.3466 -666.39618 40.769593 -707.16577 0 2.2672094 9.412943 1.2434258 13990.5 + 350 -626.27558 -665.04303 38.767448 1394.9676 -1436.8514 -667.47081 41.854746 -709.32556 0 2.4277827 10.304721 1.977594 13990.5 + 400 -625.55098 -661.86388 36.312896 1306.6455 -331.92076 -664.4632 44.426542 -708.88975 0 2.599325 11.081635 2.1734468 13990.5 + 450 -624.88626 -661.07359 36.187328 1302.1272 -2325.834 -663.6031 39.662697 -703.26579 0 2.5295037 9.9810051 1.3068929 13990.5 + 500 -623.87093 -660.24145 36.370525 1308.7192 410.85324 -662.86944 45.869201 -708.73864 0 2.6279856 10.592785 1.8162326 13990.5 +Loop time of 11.7065 on 1 procs for 500 steps with 432 atoms + +Performance: 7.381 ns/day, 3.252 hours/ns, 42.711 timesteps/s +100.0% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 11.535 | 11.535 | 11.535 | 0.0 | 98.54 +Bond | 0.001443 | 0.001443 | 0.001443 | 0.0 | 0.01 +Neigh | 0.10952 | 0.10952 | 0.10952 | 0.0 | 0.94 +Comm | 0.048796 | 0.048796 | 0.048796 | 0.0 | 0.42 +Output | 0.00038695 | 0.00038695 | 0.00038695 | 0.0 | 0.00 +Modify | 0.0067511 | 0.0067511 | 0.0067511 | 0.0 | 0.06 +Other | | 0.004243 | | | 0.04 + +Nlocal: 432 ave 432 max 432 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 9328 ave 9328 max 9328 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 297280 ave 297280 max 297280 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 297280 +Ave neighs/atom = 688.148 +Ave special neighs/atom = 1 +Neighbor list builds = 21 +Dangerous builds = 0 + +unfix thermoberendsen + +# ------------------------ Dynamic Run ------------------------------- + +run 1000 +Per MPI rank memory allocation (min/avg/max) = 8.59 | 8.59 | 8.59 Mbytes +Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume + 500 -623.87093 -660.24145 36.370525 1308.7192 410.85324 -662.86944 45.869201 -708.73864 0 2.6279856 10.592785 1.8162326 13990.5 + 550 -623.95762 -659.99899 36.041371 1296.8752 -442.62446 -662.68699 44.004419 -706.69141 0 2.6879934 10.936057 1.7358509 13990.5 + 600 -624.04214 -661.21748 37.175332 1337.6785 47.615854 -663.76133 45.009742 -708.77107 0 2.5438549 11.431366 2.1185464 13990.5 + 650 -623.98279 -661.85255 37.86976 1362.6661 -1708.3823 -664.31138 40.933446 -705.24482 0 2.4588298 9.7960191 1.4159904 13990.5 + 700 -624.02941 -661.57484 37.54543 1350.9958 -124.09512 -663.95322 44.666402 -708.61962 0 2.3783801 10.518712 2.0854584 13990.5 + 750 -624.03935 -661.11619 37.076837 1334.1344 -1163.2721 -663.82553 42.1219 -705.94743 0 2.7093404 10.634859 1.9381654 13990.5 + 800 -623.98709 -659.43647 35.44938 1275.5737 -285.65702 -662.23782 44.650233 -706.88805 0 2.8013482 11.83459 3.4506407 13990.5 + 850 -623.96081 -661.77923 37.818422 1360.8188 -814.33212 -664.38161 42.860208 -707.24181 0 2.6023728 10.766451 2.9211132 13990.5 + 900 -623.96136 -662.98419 39.022831 1404.157 308.13105 -665.65877 45.053188 -710.71195 0 2.6745786 11.028799 1.8494322 13990.5 + 950 -623.91048 -660.63828 36.7278 1321.575 -445.31526 -663.79806 43.94833 -707.74639 0 3.1597729 11.852051 1.8238458 13990.5 + 1000 -623.90541 -661.03474 37.129332 1336.0233 522.57958 -663.50212 45.863261 -709.36538 0 2.4673781 10.949291 1.8614095 13990.5 + 1050 -624.01134 -660.99076 36.979422 1330.6291 -1710.2829 -663.67762 41.056102 -704.73372 0 2.6868521 10.86533 2.2154165 13990.5 + 1100 -624.02639 -660.86661 36.840225 1325.6204 1151.0236 -663.5066 47.207185 -710.71378 0 2.6399847 11.298044 1.8900703 13990.5 + 1150 -624.04418 -661.29819 37.254006 1340.5094 161.09847 -663.73735 45.222561 -708.95991 0 2.4391651 11.261119 2.2245673 13990.5 + 1200 -624.08628 -662.39932 38.313043 1378.6168 -1405.0927 -664.93121 41.728112 -706.65933 0 2.5318927 10.458041 2.1153159 13990.5 + 1250 -624.03036 -661.25661 37.226248 1339.5106 63.407721 -663.72719 45.086961 -708.81415 0 2.4705832 11.766021 2.2297809 13990.5 + 1300 -623.97475 -661.55998 37.585229 1352.4278 -763.66781 -663.80645 42.870244 -706.6767 0 2.2464703 10.098322 1.7614596 13990.5 + 1350 -623.93123 -660.30786 36.376631 1308.9389 -2389.8052 -663.04725 39.749029 -702.79628 0 2.7393936 10.879364 2.6622521 13990.5 + 1400 -623.86468 -658.44644 34.581765 1244.3543 2262.7586 -660.95368 50.10608 -711.05976 0 2.5072434 11.528291 1.7861906 13990.5 + 1450 -623.85494 -661.47216 37.617217 1353.5789 -1435.0174 -664.13587 41.701767 -705.83764 0 2.6637191 11.297444 2.0038345 13990.5 + 1500 -623.79928 -659.70124 35.901959 1291.8588 -198.39562 -662.57415 44.358482 -706.93263 0 2.872907 11.075746 2.2821286 13990.5 +Loop time of 23.4119 on 1 procs for 1000 steps with 432 atoms + +Performance: 7.381 ns/day, 3.252 hours/ns, 42.713 timesteps/s +99.9% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 23.069 | 23.069 | 23.069 | 0.0 | 98.54 +Bond | 0.0029275 | 0.0029275 | 0.0029275 | 0.0 | 0.01 +Neigh | 0.22821 | 0.22821 | 0.22821 | 0.0 | 0.97 +Comm | 0.097941 | 0.097941 | 0.097941 | 0.0 | 0.42 +Output | 0.00074033 | 0.00074033 | 0.00074033 | 0.0 | 0.00 +Modify | 0.0042015 | 0.0042015 | 0.0042015 | 0.0 | 0.02 +Other | | 0.00865 | | | 0.04 + +Nlocal: 432 ave 432 max 432 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 9388 ave 9388 max 9388 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 297212 ave 297212 max 297212 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 297212 +Ave neighs/atom = 687.991 +Ave special neighs/atom = 1 +Neighbor list builds = 45 +Dangerous builds = 0 +Total wall time: 0:00:35 diff --git a/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4 b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..84f854cf6797e74dc2c4ab25f75796b0baa9c9aa --- /dev/null +++ b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4 @@ -0,0 +1,190 @@ +LAMMPS (22 Sep 2017) +# Testsystem for core-shell model compared to Mitchel and Finchham +# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation. + +# ------------------------ INITIALIZATION ---------------------------- + +units metal +dimension 3 +boundary p p p +atom_style full + +# ----------------------- ATOM DEFINITION ---------------------------- + +fix csinfo all property/atom i_CSID +read_data data.coreshell fix csinfo NULL CS-Info + orthogonal box = (0 0 0) to (24.096 24.096 24.096) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 432 atoms + scanning bonds ... + 1 = max bonds/atom + reading bonds ... + 216 bonds + 1 = max # of 1-2 neighbors + 0 = max # of 1-3 neighbors + 0 = max # of 1-4 neighbors + 1 = max # of special neighbors + +group cores type 1 2 +216 atoms in group cores +group shells type 3 4 +216 atoms in group shells + +neighbor 2.0 bin +comm_modify vel yes + +# ------------------------ FORCE FIELDS ------------------------------ + +pair_style born/coul/wolf/cs 0.1 20.0 20.0 # A, rho, sigma=0, C, D +pair_coeff * * 0.0 1.000 0.00 0.00 0.00 +pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na +pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl +pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl + +bond_style harmonic +bond_coeff 1 63.014 0.0 +bond_coeff 2 25.724 0.0 + +# ------------------------ Equilibration Run ------------------------------- + +reset_timestep 0 + +thermo 50 +thermo_style custom step etotal pe ke temp press epair evdwl ecoul elong ebond fnorm fmax vol + +compute CSequ all temp/cs cores shells + +# output via chunk method + +#compute prop all property/atom i_CSID +#compute cs_chunk all chunk/atom c_prop +#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 +#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector + +thermo_modify temp CSequ + +# velocity bias option + +velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 22 + ghost atom cutoff = 22 + binsize = 11, bins = 3 3 3 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair born/coul/wolf/cs, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +velocity all scale 1427 temp CSequ + +fix thermoberendsen all temp/berendsen 1427 1427 0.4 +fix nve all nve +fix_modify thermoberendsen temp CSequ + +# 2 fmsec timestep + +timestep 0.002 +run 500 +Per MPI rank memory allocation (min/avg/max) = 6.881 | 6.881 | 6.881 Mbytes +Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume + 0 -637.41039 -677.06805 39.657659 1427 -21302.622 -677.06805 1.6320365 -678.70009 0 0 1.3530839e-14 3.6429193e-15 13990.5 + 50 -635.62704 -667.67108 32.044047 1153.0397 -4532.6842 -669.45828 37.771018 -707.2293 0 1.787201 9.6848095 2.2028349 13990.5 + 100 -632.76343 -662.83014 30.066711 1081.8893 -3493.0827 -664.98212 39.195967 -704.17809 0 2.151977 11.060773 2.1578583 13990.5 + 150 -630.82533 -663.70172 32.876385 1182.9897 -75.706974 -666.12608 46.261038 -712.38712 0 2.4243598 11.746728 2.7663319 13990.5 + 200 -629.15463 -664.55009 35.395466 1273.6337 -1707.9185 -666.88117 41.794868 -708.67604 0 2.331082 10.596229 3.0031523 13990.5 + 250 -627.86625 -662.60876 34.742511 1250.1384 -1263.5726 -665.214 43.005742 -708.21974 0 2.6052329 10.572641 1.841604 13990.5 + 300 -627.10829 -664.12159 37.013298 1331.8481 -1884.8587 -666.39136 40.786141 -707.1775 0 2.2697693 9.4160685 1.2472271 13990.5 + 350 -626.2729 -665.01858 38.745682 1394.1844 -1433.1302 -667.44315 41.864785 -709.30793 0 2.4245625 10.312641 1.9815612 13990.5 + 400 -625.54274 -661.84438 36.301638 1306.2404 -355.45544 -664.4483 44.370671 -708.81897 0 2.6039276 11.076154 2.1819703 13990.5 + 450 -624.88802 -661.12299 36.234964 1303.8413 -2356.6101 -663.6463 39.57847 -703.22477 0 2.523314 9.8995886 1.3113549 13990.5 + 500 -623.86344 -660.20235 36.338906 1307.5814 462.72862 -662.8515 45.983055 -708.83455 0 2.6491489 10.523732 1.6709561 13990.5 +Loop time of 4.63313 on 4 procs for 500 steps with 432 atoms + +Performance: 18.648 ns/day, 1.287 hours/ns, 107.919 timesteps/s +99.4% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.8133 | 3.5934 | 4.4255 | 36.3 | 77.56 +Bond | 0.00042245 | 0.00050305 | 0.00059825 | 0.0 | 0.01 +Neigh | 0.0411 | 0.041561 | 0.04202 | 0.2 | 0.90 +Comm | 0.15622 | 0.9884 | 1.7686 | 69.2 | 21.33 +Output | 0.00028311 | 0.00031345 | 0.00040011 | 0.0 | 0.01 +Modify | 0.0039899 | 0.00403 | 0.0040654 | 0.0 | 0.09 +Other | | 0.004911 | | | 0.11 + +Nlocal: 108 ave 112 max 106 min +Histogram: 2 0 0 1 0 0 0 0 0 1 +Nghost: 6531.75 ave 6611 max 6460 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Neighs: 74316.2 ave 75641 max 72780 min +Histogram: 1 0 0 1 0 0 0 0 1 1 + +Total # of neighbors = 297265 +Ave neighs/atom = 688.113 +Ave special neighs/atom = 1 +Neighbor list builds = 21 +Dangerous builds = 0 + +unfix thermoberendsen + +# ------------------------ Dynamic Run ------------------------------- + +run 1000 +Per MPI rank memory allocation (min/avg/max) = 6.884 | 7.01 | 7.138 Mbytes +Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume + 500 -623.86344 -660.20235 36.338906 1307.5814 462.72862 -662.8515 45.983055 -708.83455 0 2.6491489 10.523732 1.6709561 13990.5 + 550 -623.95339 -660.07946 36.126069 1299.9229 -362.67288 -662.8047 44.180832 -706.98553 0 2.725239 11.07199 2.0461377 13990.5 + 600 -624.04508 -661.27298 37.227902 1339.5701 110.39823 -663.90927 45.166569 -709.07584 0 2.6362911 11.747923 2.1846828 13990.5 + 650 -623.9608 -661.50573 37.544934 1350.9779 -1594.4701 -663.91531 41.226571 -705.14188 0 2.4095736 10.022027 1.6264014 13990.5 + 700 -624.00658 -663.55636 39.549777 1423.1181 -588.9804 -665.88666 43.124023 -709.01068 0 2.3302979 9.924587 2.1027837 13990.5 + 750 -623.99813 -659.97695 35.978816 1294.6243 -364.1415 -662.75959 43.973556 -706.73315 0 2.782646 11.487832 1.8799695 13990.5 + 800 -624.01235 -661.04908 37.036728 1332.6911 -85.655852 -663.49204 44.570117 -708.06215 0 2.4429547 11.213149 1.6792926 13990.5 + 850 -624.02581 -659.39933 35.373519 1272.844 -1090.9179 -662.11969 42.375064 -704.49476 0 2.7203673 11.214629 2.2907474 13990.5 + 900 -624.09244 -663.33386 39.241421 1412.0225 303.76207 -666.00593 45.332458 -711.33839 0 2.6720667 11.897188 2.0599033 13990.5 + 950 -624.04544 -661.11578 37.070341 1333.9007 1637.6438 -663.61305 48.65892 -712.27197 0 2.49727 12.343774 2.429225 13990.5 + 1000 -623.95457 -661.51816 37.563586 1351.6491 -851.77182 -663.83594 42.813468 -706.64941 0 2.317782 10.251422 1.6922864 13990.5 + 1050 -623.98731 -662.01988 38.032562 1368.5242 -2123.9075 -664.72609 39.917382 -704.64348 0 2.706218 9.600838 1.7365559 13990.5 + 1100 -623.9653 -659.32023 35.354929 1272.1751 -436.90576 -662.14454 44.057776 -706.20232 0 2.8243167 11.267546 2.6807602 13990.5 + 1150 -624.02273 -665.13902 41.116293 1479.486 -128.13268 -667.4769 44.013761 -711.49066 0 2.3378768 10.406604 1.5102324 13990.5 + 1200 -623.97328 -663.08042 39.107135 1407.1905 -539.63284 -665.67946 43.319308 -708.99877 0 2.599042 10.632657 1.4608707 13990.5 + 1250 -623.92529 -661.81902 37.893723 1363.5284 1708.0621 -664.63624 48.396874 -713.03311 0 2.8172251 11.369352 1.8836979 13990.5 + 1300 -623.99283 -662.19734 38.204509 1374.7114 -820.77291 -664.34556 42.656202 -707.00176 0 2.1482151 10.186898 1.7081329 13990.5 + 1350 -623.99798 -658.70752 34.709535 1248.9519 776.28486 -662.01647 46.925412 -708.94188 0 3.3089536 12.457581 2.5027978 13990.5 + 1400 -623.96941 -662.15959 38.190183 1374.1959 880.21756 -664.90452 46.628183 -711.5327 0 2.7449282 11.918894 3.0466188 13990.5 + 1450 -623.95068 -662.25435 38.303671 1378.2795 -391.56711 -664.7631 43.644066 -708.40716 0 2.5087493 10.465803 1.8744864 13990.5 + 1500 -624.00637 -661.48756 37.481185 1348.684 430.69453 -664.32151 45.704366 -710.02587 0 2.8339501 11.650821 2.0752813 13990.5 +Loop time of 6.7003 on 4 procs for 1000 steps with 432 atoms + +Performance: 25.790 ns/day, 0.931 hours/ns, 149.247 timesteps/s +99.1% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.9613 | 6.0752 | 6.2064 | 4.1 | 90.67 +Bond | 0.00087095 | 0.00088993 | 0.00091671 | 0.0 | 0.01 +Neigh | 0.06494 | 0.064964 | 0.064987 | 0.0 | 0.97 +Comm | 0.41743 | 0.54863 | 0.6626 | 13.8 | 8.19 +Output | 0.00053867 | 0.00059268 | 0.00074867 | 0.0 | 0.01 +Modify | 0.0013288 | 0.0013592 | 0.0013841 | 0.1 | 0.02 +Other | | 0.008613 | | | 0.13 + +Nlocal: 108 ave 116 max 102 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Nghost: 6504.5 ave 6624 max 6380 min +Histogram: 1 1 0 0 0 0 0 0 1 1 +Neighs: 74267 ave 79230 max 70993 min +Histogram: 2 0 0 0 0 1 0 0 0 1 + +Total # of neighbors = 297068 +Ave neighs/atom = 687.657 +Ave special neighs/atom = 1 +Neighbor list builds = 45 +Dangerous builds = 0 +Total wall time: 0:00:11 diff --git a/examples/melt/in.melt b/examples/melt/in.melt deleted file mode 100644 index bf6eb735472ef5283346090c472167e01e540ed8..0000000000000000000000000000000000000000 --- a/examples/melt/in.melt +++ /dev/null @@ -1,33 +0,0 @@ -# 3d Lennard-Jones melt - -units lj -atom_style atomic - -lattice fcc 0.8442 -region box block 0 $L 0 $L 0 $L -create_box 1 box -create_atoms 1 box -mass 1 1.0 - -velocity all create 3.0 87287 - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify every 20 delay 0 check no - -fix 1 all nve - -#dump id all atom 50 dump.melt - -#dump 2 all image 25 image.*.jpg type type & -# axes yes 0.8 0.02 view 60 -30 -#dump_modify 2 pad 3 - -#dump 3 all movie 25 movie.mpg type type & -# axes yes 0.8 0.02 view 60 -30 -#dump_modify 3 pad 3 - -thermo 50 -run 250 diff --git a/examples/peptide/in.peptide b/examples/peptide/in.peptide deleted file mode 100644 index cdf21636943726560d31b7f38982d6243bac62b8..0000000000000000000000000000000000000000 --- a/examples/peptide/in.peptide +++ /dev/null @@ -1,42 +0,0 @@ -# Solvated 5-mer peptide - -units real -atom_style full - -pair_style lj/charmm/coul/long 8.0 10.0 10.0 -bond_style harmonic -angle_style charmm -dihedral_style charmm -improper_style harmonic -kspace_style pppm 0.0001 - -read_data data.peptide - -neighbor 2.0 bin -neigh_modify delay 5 - -timestep 2.0 - -thermo_style multi -thermo 50 - -fix 1 all nvt temp 275.0 275.0 100.0 tchain 1 -fix 2 all shake 0.0001 10 100 b 4 6 8 10 12 14 18 a 31 - -group peptide type <= 12 - -dump 1 peptide atom 10 dump.peptide - -#dump 2 peptide image 25 image.*.jpg type type & -# axes yes 0.8 0.02 view 60 -30 bond atom 0.5 -#dump_modify 2 pad 3 - -#dump 3 peptide movie 25 movie.mpg type type & -# axes yes 0.8 0.02 view 60 -30 bond atom 0.5 -#dump_modify 3 pad 3 - -#compute bnd all property/local btype batom1 batom2 -#dump 2 peptide local 300 dump.bond index c_bnd[1] c_bnd[2] c_bnd[3] - -run 300 - diff --git a/examples/rigid/bodies.txt b/examples/rigid/bodies.txt new file mode 100644 index 0000000000000000000000000000000000000000..ddca2c7539b57a1eed2109d954bd5903bc7a765d --- /dev/null +++ b/examples/rigid/bodies.txt @@ -0,0 +1,83 @@ +81 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +10 2 +11 2 +12 2 +13 2 +14 2 +15 2 +16 2 +17 2 +18 2 +19 3 +20 3 +21 3 +22 3 +23 3 +24 3 +25 3 +26 3 +27 3 +28 4 +29 4 +30 4 +31 4 +32 4 +33 4 +34 4 +35 4 +36 4 +37 5 +38 5 +39 5 +40 5 +41 5 +42 5 +43 5 +44 5 +45 5 +46 6 +47 6 +48 6 +49 6 +50 6 +51 6 +52 6 +53 6 +54 6 +55 7 +56 7 +57 7 +58 7 +59 7 +60 7 +61 7 +62 7 +63 7 +64 8 +65 8 +66 8 +67 8 +68 8 +69 8 +70 8 +71 8 +72 8 +73 9 +74 9 +75 9 +76 9 +77 9 +78 9 +79 9 +80 9 +81 9 + diff --git a/examples/rigid/data.rigid-property b/examples/rigid/data.rigid-property new file mode 100644 index 0000000000000000000000000000000000000000..66fc42d2d2cf6b0e27a1ca0cb4cedbac889b0d75 --- /dev/null +++ b/examples/rigid/data.rigid-property @@ -0,0 +1,185 @@ +# LAMMPS data file for rigid bodies + +81 atoms +1 atom types + +-12 12 xlo xhi +-12 12 ylo yhi +-12 12 zlo zhi + +Masses + +1 1 + +Pair Coeffs + +1 1 1 + +Atoms + +1 1 0 0 0 +2 1 0 1 0 +3 1 0 0.5 0 +4 1 0.5 0 0 +5 1 0.5 0.5 1 +6 1 1 0.5 0 +7 1 0.5 1 0 +8 1 1 0 0 +9 1 1 1 0 +10 1 2 1 0 +11 1 1 2 0 +12 1 1.5 2 0 +13 1 1.5 1 0 +14 1 1 1.5 0 +15 1 1.5 1.5 1 +16 1 2 1.5 0 +17 1 2 2 0 +18 1 2 3 0 +19 1 2 2.5 0 +20 1 2.5 2 0 +21 1 2.5 2.5 1 +22 1 3 2.5 0 +23 1 2.5 3 0 +24 1 3 2 0 +25 1 3 3 0 +26 1 4 3 0 +27 1 3 4 0 +28 1 3.5 4 0 +29 1 3.5 3 0 +30 1 3 3.5 0 +31 1 3.5 3.5 1 +32 1 4 3.5 0 +33 1 4 4 0 +34 1 4 5 0 +35 1 4 4.5 0 +36 1 4.5 4 0 +37 1 4.5 4.5 1 +38 1 5 4.5 0 +39 1 4.5 5 0 +40 1 5 4 0 +41 1 5 5 0 +42 1 6 5 0 +43 1 5 6 0 +44 1 5.5 6 0 +45 1 5.5 5 0 +46 1 5 5.5 0 +47 1 5.5 5.5 1 +48 1 6 5.5 0 +49 1 6 6 0 +50 1 6 7 0 +51 1 6 6.5 0 +52 1 6.5 6 0 +53 1 6.5 6.5 1 +54 1 7 6.5 0 +55 1 6.5 7 0 +56 1 7 6 0 +57 1 7 7 0 +58 1 8 7 0 +59 1 7 8 0 +60 1 7.5 8 0 +61 1 7.5 7 0 +62 1 7 7.5 0 +63 1 7.5 7.5 1 +64 1 8 7.5 0 +65 1 8 8 0 +66 1 8 9 0 +67 1 8 8.5 0 +68 1 8.5 8 0 +69 1 8.5 8.5 1 +70 1 9 8.5 0 +71 1 8.5 9 0 +72 1 9 8 0 +73 1 9 9 0 +74 1 10 9 0 +75 1 9 10 0 +76 1 9.5 10 0 +77 1 9.5 9 0 +78 1 9 9.5 0 +79 1 9.5 9.5 1 +80 1 10 9.5 0 +81 1 10 10 0 + +Bodies + +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +10 2 +11 2 +12 2 +13 2 +14 2 +15 2 +16 2 +17 2 +18 2 +19 3 +20 3 +21 3 +22 3 +23 3 +24 3 +25 3 +26 3 +27 3 +28 4 +29 4 +30 4 +31 4 +32 4 +33 4 +34 4 +35 4 +36 4 +37 5 +38 5 +39 5 +40 5 +41 5 +42 5 +43 5 +44 5 +45 5 +46 6 +47 6 +48 6 +49 6 +50 6 +51 6 +52 6 +53 6 +54 6 +55 7 +56 7 +57 7 +58 7 +59 7 +60 7 +61 7 +62 7 +63 7 +64 8 +65 8 +66 8 +67 8 +68 8 +69 8 +70 8 +71 8 +72 8 +73 9 +74 9 +75 9 +76 9 +77 9 +78 9 +79 9 +80 9 +81 9 + diff --git a/examples/rigid/in.rigid b/examples/rigid/in.rigid-atomfile similarity index 86% rename from examples/rigid/in.rigid rename to examples/rigid/in.rigid-atomfile index 4482395a6c1f7a918cea7c4eb98c386c1da3a989..8817386c79f7df174b1a538b107b88da5897d5b4 100644 --- a/examples/rigid/in.rigid +++ b/examples/rigid/in.rigid-atomfile @@ -2,6 +2,7 @@ units lj atom_style atomic +atom_modify map array pair_style lj/cut 2.5 @@ -9,6 +10,7 @@ read_data data.rigid velocity all create 100.0 4928459 + # unconnected bodies group clump1 id <> 1 9 @@ -21,11 +23,8 @@ group clump7 id <> 55 63 group clump8 id <> 64 72 group clump9 id <> 73 81 -#fix 1 all rigid group 9 clump1 clump2 clump3 clump4 clump5 & -# clump6 clump7 clump8 clump9 - -fix 1 all rigid/small group 9 clump1 clump2 clump3 clump4 clump5 & - clump6 clump7 clump8 clump9 +variable bodies atomfile bodies.txt +fix 1 all rigid custom v_bodies # 1 chain of connected bodies @@ -67,9 +66,9 @@ neigh_modify exclude group clump7 clump7 neigh_modify exclude group clump8 clump8 neigh_modify exclude group clump9 clump9 -thermo 10000 +thermo 100 -dump 1 all atom 50 dump.rigid +#dump 1 all atom 50 dump.rigid #dump 2 all image 100 image.*.jpg type type & # axes yes 0.8 0.02 view 60 -30 @@ -80,5 +79,5 @@ dump 1 all atom 50 dump.rigid #dump_modify 3 pad 5 timestep 0.0001 -thermo 50000 -run 100000 +thermo 50 +run 10000 diff --git a/examples/rigid/in.rigid-atomvar b/examples/rigid/in.rigid-atomvar new file mode 100644 index 0000000000000000000000000000000000000000..994ab1d28432db3a934cac421f419d6d5adbf127 --- /dev/null +++ b/examples/rigid/in.rigid-atomvar @@ -0,0 +1,83 @@ +# Simple rigid body system + +units lj +atom_style atomic +atom_modify map array + +pair_style lj/cut 2.5 + +read_data data.rigid + +velocity all create 100.0 4928459 + + +# unconnected bodies + +group clump1 id <> 1 9 +group clump2 id <> 10 18 +group clump3 id <> 19 27 +group clump4 id <> 28 36 +group clump5 id <> 37 45 +group clump6 id <> 46 54 +group clump7 id <> 55 63 +group clump8 id <> 64 72 +group clump9 id <> 73 81 + +variable bodies atom 1.0*gmask(clump1)+2.0*gmask(clump2)+3.0*gmask(clump3)+4.0*gmask(clump4)+5.0*gmask(clump5)+6.0*gmask(clump6)+7.0*gmask(clump7)+8.0*gmask(clump8)+9.0*gmask(clump9) +fix 1 all rigid custom v_bodies + +# 1 chain of connected bodies + +#group clump1 id <> 1 9 +#group clump2 id <> 9 18 +#group clump3 id <> 18 27 +#group clump4 id <> 27 36 +#group clump5 id <> 36 45 +#group clump6 id <> 45 54 +#group clump7 id <> 54 63 +#group clump8 id <> 63 72 +#group clump9 id <> 72 81 + +#fix 1 all poems group clump1 clump2 clump3 clump4 clump5 & +# clump6 clump7 clump8 clump9 + +# 2 chains of connected bodies + +#group clump1 id <> 1 9 +#group clump2 id <> 9 18 +#group clump3 id <> 18 27 +#group clump4 id <> 27 36 +#group clump5 id <> 37 45 +#group clump6 id <> 45 54 +#group clump7 id <> 54 63 +#group clump8 id <> 63 72 +#group clump9 id <> 72 81 + +#fix 1 all poems group clump1 clump2 clump3 clump4 +#fix 2 all poems group clump5 clump6 clump7 clump8 clump9 + +neigh_modify exclude group clump1 clump1 +neigh_modify exclude group clump2 clump2 +neigh_modify exclude group clump3 clump3 +neigh_modify exclude group clump4 clump4 +neigh_modify exclude group clump5 clump5 +neigh_modify exclude group clump6 clump6 +neigh_modify exclude group clump7 clump7 +neigh_modify exclude group clump8 clump8 +neigh_modify exclude group clump9 clump9 + +thermo 100 + +#dump 1 all atom 50 dump.rigid + +#dump 2 all image 100 image.*.jpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 5 + +#dump 3 all movie 100 movie.mpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 5 + +timestep 0.0001 +thermo 50 +run 10000 diff --git a/examples/rigid/in.rigid-property b/examples/rigid/in.rigid-property new file mode 100644 index 0000000000000000000000000000000000000000..53d62776e6764c4e22573324f4bc69da8cb79d7a --- /dev/null +++ b/examples/rigid/in.rigid-property @@ -0,0 +1,85 @@ +# Simple rigid body system + +units lj +atom_style atomic +atom_modify map array + +pair_style lj/cut 2.5 + +fix 0 all property/atom i_bodies + +read_data data.rigid-property fix 0 NULL Bodies + +velocity all create 100.0 4928459 + + +# unconnected bodies + +group clump1 id <> 1 9 +group clump2 id <> 10 18 +group clump3 id <> 19 27 +group clump4 id <> 28 36 +group clump5 id <> 37 45 +group clump6 id <> 46 54 +group clump7 id <> 55 63 +group clump8 id <> 64 72 +group clump9 id <> 73 81 + +# assemble bodies from per-atom custom integer property bodies +fix 1 all rigid custom i_bodies + +# 1 chain of connected bodies + +#group clump1 id <> 1 9 +#group clump2 id <> 9 18 +#group clump3 id <> 18 27 +#group clump4 id <> 27 36 +#group clump5 id <> 36 45 +#group clump6 id <> 45 54 +#group clump7 id <> 54 63 +#group clump8 id <> 63 72 +#group clump9 id <> 72 81 + +#fix 1 all poems group clump1 clump2 clump3 clump4 clump5 & +# clump6 clump7 clump8 clump9 + +# 2 chains of connected bodies + +#group clump1 id <> 1 9 +#group clump2 id <> 9 18 +#group clump3 id <> 18 27 +#group clump4 id <> 27 36 +#group clump5 id <> 37 45 +#group clump6 id <> 45 54 +#group clump7 id <> 54 63 +#group clump8 id <> 63 72 +#group clump9 id <> 72 81 + +#fix 1 all poems group clump1 clump2 clump3 clump4 +#fix 2 all poems group clump5 clump6 clump7 clump8 clump9 + +neigh_modify exclude group clump1 clump1 +neigh_modify exclude group clump2 clump2 +neigh_modify exclude group clump3 clump3 +neigh_modify exclude group clump4 clump4 +neigh_modify exclude group clump5 clump5 +neigh_modify exclude group clump6 clump6 +neigh_modify exclude group clump7 clump7 +neigh_modify exclude group clump8 clump8 +neigh_modify exclude group clump9 clump9 + +thermo 100 + +#dump 1 all atom 50 dump.rigid + +#dump 2 all image 100 image.*.jpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 2 pad 5 + +#dump 3 all movie 100 movie.mpg type type & +# axes yes 0.8 0.02 view 60 -30 +#dump_modify 3 pad 5 + +timestep 0.0001 +thermo 50 +run 10000 diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common index f47403f771f2095928d528d6872b3644a9a164d3..e3fa4662e638ef1668a21259f7fb5c021c599e9a 100644 --- a/lib/colvars/Makefile.common +++ b/lib/colvars/Makefile.common @@ -12,7 +12,7 @@ ifeq ($(COLVARS_DEBUG),) COLVARS_DEBUG_INCFLAGS = else -COLVARS_DEBUG_INCFLAGS= -DCOLVARS_DEBUG +COLVARS_DEBUG_INCFLAGS = -DCOLVARS_DEBUG endif COLVARS_INCFLAGS = $(COLVARS_DEBUG_INCFLAGS) $(COLVARS_PYTHON_INCFLAGS) @@ -21,6 +21,7 @@ COLVARS_INCFLAGS = $(COLVARS_DEBUG_INCFLAGS) $(COLVARS_PYTHON_INCFLAGS) .SUFFIXES: .SUFFIXES: .cpp .o + COLVARS_SRCS = \ colvaratoms.cpp \ colvarbias_abf.cpp \ @@ -45,21 +46,32 @@ COLVARS_SRCS = \ colvartypes.cpp \ colvarvalue.cpp -COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) +LEPTON_SRCS = \ + lepton/src/CompiledExpression.cpp lepton/src/ExpressionTreeNode.cpp \ + lepton/src/ParsedExpression.cpp lepton/src/ExpressionProgram.cpp \ + lepton/src/Operation.cpp lepton/src/Parser.cpp + +LEPTON_OBJS = \ + lepton/src/CompiledExpression.o lepton/src/ExpressionTreeNode.o \ + lepton/src/ParsedExpression.o lepton/src/ExpressionProgram.o \ + lepton/src/Operation.o lepton/src/Parser.o + +COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) $(LEPTON_OBJS) -.cpp.o: - $(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) -c $< +%.o: %.cpp + $(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) -Ilepton/include -DLEPTON -c -o $@ $< $(COLVARS_LIB): Makefile.deps $(COLVARS_OBJS) - $(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS) + $(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS) $(LEPTON_OBJS) Makefile.deps: $(COLVARS_SRCS) @echo > $@ @for src in $^ ; do \ obj=`basename $$src .cpp`.o ; \ - $(CXX) -MM $(COLVARS_INCFLAGS) \ + $(CXX) -MM $(COLVARS_INCFLAGS) -Ilepton/include -DLEPTON \ -MT '$$(COLVARS_OBJ_DIR)'$$obj $$src >> $@ ; \ done include Makefile.deps +include Makefile.lepton.deps # Hand-generated diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps index f463da5f86f2b40e5a42230fff856c32cd0f2349..e0c768dbc9b042b3ae14731b6319929af9a20705 100644 --- a/lib/colvars/Makefile.deps +++ b/lib/colvars/Makefile.deps @@ -4,73 +4,232 @@ $(COLVARS_OBJ_DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h \ colvarparse.h colvaratoms.h colvardeps.h $(COLVARS_OBJ_DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \ - colvarparse.h colvardeps.h colvarbias_abf.h colvarbias.h colvargrid.h + colvarparse.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarbias_abf.h colvarbias.h colvargrid.h colvar_UIestimator.h $(COLVARS_OBJ_DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ - colvarbias_alb.h colvar.h colvarparse.h colvardeps.h colvarbias.h + colvarbias_alb.h colvar.h colvarparse.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarbias.h $(COLVARS_OBJ_DIR)colvarbias.o: colvarbias.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h \ - colvar.h colvarparse.h colvardeps.h + colvar.h colvarparse.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvargrid.h $(COLVARS_OBJ_DIR)colvarbias_histogram.o: colvarbias_histogram.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarbias_histogram.h \ - colvarbias.h colvargrid.h + colvarvalue.h colvar.h colvarparse.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarbias_histogram.h colvarbias.h colvargrid.h $(COLVARS_OBJ_DIR)colvarbias_meta.o: colvarbias_meta.cpp colvar.h \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvarbias_meta.h colvarbias.h \ - colvargrid.h + colvarvalue.h colvarparse.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarbias_meta.h colvarbias.h colvargrid.h $(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ colvarvalue.h colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \ - colvardeps.h + colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h $(COLVARS_OBJ_DIR)colvarcomp_angles.o: colvarcomp_angles.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h + colvarvalue.h colvar.h colvarparse.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h $(COLVARS_OBJ_DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ colvarvalue.h colvarparse.h colvaratoms.h colvardeps.h colvar.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ colvarcomp.h $(COLVARS_OBJ_DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \ - colvarparse.h colvardeps.h colvarcomp.h colvaratoms.h + colvarparse.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h $(COLVARS_OBJ_DIR)colvarcomp_distances.o: colvarcomp_distances.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ - colvaratoms.h + colvarvalue.h colvarparse.h colvar.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h $(COLVARS_OBJ_DIR)colvarcomp_protein.o: colvarcomp_protein.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ - colvaratoms.h + colvarvalue.h colvarparse.h colvar.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h $(COLVARS_OBJ_DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ - colvaratoms.h + colvarvalue.h colvarparse.h colvar.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h $(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \ colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvar.h \ - colvardeps.h colvarcomp.h colvaratoms.h colvarscript.h colvarbias.h + colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h colvarscript.h colvarbias.h $(COLVARS_OBJ_DIR)colvardeps.o: colvardeps.cpp colvardeps.h \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ colvarvalue.h colvarparse.h $(COLVARS_OBJ_DIR)colvargrid.o: colvargrid.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ - colvarparse.h colvar.h colvardeps.h colvarcomp.h colvaratoms.h \ - colvargrid.h + colvarparse.h colvar.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarcomp.h colvaratoms.h colvargrid.h $(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ - colvarparse.h colvar.h colvardeps.h colvarbias.h colvarbias_abf.h \ - colvargrid.h colvarbias_alb.h colvarbias_histogram.h colvarbias_meta.h \ - colvarbias_restraint.h colvarscript.h colvaratoms.h + colvarparse.h colvar.h colvardeps.h lepton/include/Lepton.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ + colvarbias.h colvarbias_abf.h colvargrid.h colvar_UIestimator.h \ + colvarbias_alb.h colvarbias_histogram.h colvarbias_meta.h \ + colvarbias_restraint.h colvarscript.h colvaratoms.h colvarcomp.h $(COLVARS_OBJ_DIR)colvarparse.o: colvarparse.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ colvarparse.h $(COLVARS_OBJ_DIR)colvarproxy.o: colvarproxy.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ colvarscript.h colvarbias.h colvar.h colvarparse.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \ colvaratoms.h $(COLVARS_OBJ_DIR)colvarscript.o: colvarscript.cpp colvarscript.h \ colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h + colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h \ + lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h $(COLVARS_OBJ_DIR)colvartypes.o: colvartypes.cpp colvarmodule.h \ colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ colvarparse.h diff --git a/lib/colvars/Makefile.lepton.deps b/lib/colvars/Makefile.lepton.deps new file mode 100644 index 0000000000000000000000000000000000000000..93c3912384b7bc41f4900b481903aa0e6740dd3b --- /dev/null +++ b/lib/colvars/Makefile.lepton.deps @@ -0,0 +1,40 @@ +lepton/src/CompiledExpression.o: lepton/src/CompiledExpression.cpp \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h +lepton/src/ExpressionProgram.o: lepton/src/ExpressionProgram.cpp \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h +lepton/src/ExpressionTreeNode.o: lepton/src/ExpressionTreeNode.cpp \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/Exception.h lepton/include/lepton/Operation.h \ + lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h +lepton/src/Operation.o: lepton/src/Operation.cpp \ + lepton/include/lepton/Operation.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \ + lepton/include/lepton/ExpressionTreeNode.h lepton/src/MSVC_erfc.h +lepton/src/ParsedExpression.o: lepton/src/ParsedExpression.cpp \ + lepton/include/lepton/ParsedExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CompiledExpression.h \ + lepton/include/lepton/ExpressionProgram.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h +lepton/src/Parser.o: lepton/src/Parser.cpp \ + lepton/include/lepton/Parser.h lepton/include/lepton/windowsIncludes.h \ + lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \ + lepton/include/lepton/ExpressionTreeNode.h \ + lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \ + lepton/include/lepton/Exception.h \ + lepton/include/lepton/ParsedExpression.h \ + lepton/include/lepton/ExpressionTreeNode.h diff --git a/lib/colvars/README b/lib/colvars/README index 5df9612dfa3b6f73af79137701fdfae4bb90158b..087528748b6b2e80b6d76adcd1bb95b21b42c020 100644 --- a/lib/colvars/README +++ b/lib/colvars/README @@ -47,6 +47,10 @@ correct for your system, else the LAMMPS build will likely fail. If you want to set a debug flag recognized by the library, the settings in Makefile.common should work. +Note: some Colvars functions use the Lepton mathematical expression parser, +which is here included (no additional steps required). For more details, see: + https://simtk.org/projects/lepton + ## Documentation diff --git a/lib/colvars/colvar.cpp b/lib/colvars/colvar.cpp index d23bd852aaf8be1fe81c41039c9d577af87c7384..ce76b3b9eb2c8bd5f37b85af332611fa7ca963b3 100644 --- a/lib/colvars/colvar.cpp +++ b/lib/colvars/colvar.cpp @@ -1008,6 +1008,8 @@ int colvar::calc() int colvar::calc_cvcs(int first_cvc, size_t num_cvcs) { + colvarproxy *proxy = cvm::main()->proxy; + int error_code = COLVARS_OK; if (cvm::debug()) cvm::log("Calculating colvar \""+this->name+"\", components "+ @@ -1018,14 +1020,18 @@ int colvar::calc_cvcs(int first_cvc, size_t num_cvcs) return error_code; } - if (cvm::step_relative() > 0) { - // Total force depends on Jacobian derivative from previous timestep + if ((cvm::step_relative() > 0) && (!proxy->total_forces_same_step())){ + // Use Jacobian derivative from previous timestep error_code |= calc_cvc_total_force(first_cvc, num_cvcs); } // atom coordinates are updated by the next line error_code |= calc_cvc_values(first_cvc, num_cvcs); error_code |= calc_cvc_gradients(first_cvc, num_cvcs); error_code |= calc_cvc_Jacobians(first_cvc, num_cvcs); + if (proxy->total_forces_same_step()){ + // Use Jacobian derivative from this timestep + error_code |= calc_cvc_total_force(first_cvc, num_cvcs); + } if (cvm::debug()) cvm::log("Done calculating colvar \""+this->name+"\".\n"); @@ -1043,6 +1049,7 @@ int colvar::collect_cvc_data() if (cvm::step_relative() > 0) { // Total force depends on Jacobian derivative from previous timestep + // collect_cvc_total_forces() uses the previous value of jd error_code |= collect_cvc_total_forces(); } error_code |= collect_cvc_values(); @@ -1138,7 +1145,7 @@ int colvar::collect_cvc_values() if (!cvcs[i]->is_enabled()) continue; x += (cvcs[i])->sup_coeff * ( ((cvcs[i])->sup_np != 1) ? - std::pow((cvcs[i])->value().real_value, (cvcs[i])->sup_np) : + cvm::integer_power((cvcs[i])->value().real_value, (cvcs[i])->sup_np) : (cvcs[i])->value().real_value ); } } else { @@ -1219,7 +1226,7 @@ int colvar::collect_cvc_gradients() if (!cvcs[i]->is_enabled()) continue; // Coefficient: d(a * x^n) = a * n * x^(n-1) * dx cvm::real coeff = (cvcs[i])->sup_coeff * cvm::real((cvcs[i])->sup_np) * - std::pow((cvcs[i])->value().real_value, (cvcs[i])->sup_np-1); + cvm::integer_power((cvcs[i])->value().real_value, (cvcs[i])->sup_np-1); for (size_t j = 0; j < cvcs[i]->atom_groups.size(); j++) { @@ -1471,9 +1478,15 @@ cvm::real colvar::update_forces_energy() // Coupling force is a slow force, to be applied to atomic coords impulse-style f *= cvm::real(time_step_factor); - // The total force acting on the extended variable is f_ext - // This will be used in the next timestep - ft_reported = f_ext; + if (is_enabled(f_cv_subtract_applied_force)) { + // Report a "system" force without the biases on this colvar + // that is, just the spring force + ft_reported = (-0.5 * ext_force_k) * this->dist2_lgrad(xr, x); + } else { + // The total force acting on the extended variable is f_ext + // This will be used in the next timestep + ft_reported = f_ext; + } // leapfrog: starting from x_i, f_i, v_(i-1/2) vr += (0.5 * dt) * f_ext / ext_mass; @@ -1580,9 +1593,9 @@ void colvar::communicate_forces() for (i = 0; i < cvcs.size(); i++) { if (!cvcs[i]->is_enabled()) continue; (cvcs[i])->apply_force(f * (cvcs[i])->sup_coeff * - cvm::real((cvcs[i])->sup_np) * - (std::pow((cvcs[i])->value().real_value, - (cvcs[i])->sup_np-1)) ); + cvm::real((cvcs[i])->sup_np) * + (cvm::integer_power((cvcs[i])->value().real_value, + (cvcs[i])->sup_np-1)) ); } } else { diff --git a/lib/colvars/colvar.h b/lib/colvars/colvar.h index dfa9e093a537b797284909e2d7a468571cb8cb98..20dad2771b6c25afef9c4667ca23f24cfe61d46d 100644 --- a/lib/colvars/colvar.h +++ b/lib/colvars/colvar.h @@ -60,7 +60,10 @@ public: /// \brief Current actual value (not extended DOF) colvarvalue const & actual_value() const; - + + /// \brief Force constant of the spring + cvm::real const & force_constant() const; + /// \brief Current velocity (previously set by calc() or by read_traj()) colvarvalue const & velocity() const; @@ -96,6 +99,12 @@ public: { return cv_features; } + static void delete_features() { + for (size_t i=0; i < cv_features.size(); i++) { + delete cv_features[i]; + } + cv_features.clear(); + } /// Implements possible actions to be carried out /// when a given feature is enabled @@ -592,6 +601,10 @@ public: } }; +inline cvm::real const & colvar::force_constant() const +{ + return ext_force_k; +} inline colvarvalue const & colvar::value() const { diff --git a/lib/colvars/colvar_UIestimator.h b/lib/colvars/colvar_UIestimator.h new file mode 100644 index 0000000000000000000000000000000000000000..7fc7f870a10932d176525bb3c6c42d574a572fb1 --- /dev/null +++ b/lib/colvars/colvar_UIestimator.h @@ -0,0 +1,736 @@ +// -*- c++ -*- + +// This file is part of the Collective Variables module (Colvars). +// The original version of Colvars and its updates are located at: +// https://github.com/colvars/colvars +// Please update all Colvars source files before making any changes. +// If you wish to distribute your changes, please submit them to the +// Colvars repository at GitHub. + +#ifndef COLVAR_UIESTIMATOR_H +#define COLVAR_UIESTIMATOR_H + +#include <cmath> +#include <vector> +#include <iostream> +#include <fstream> +#include <string> + +#include <typeinfo> + +// only for colvar module! +// when integrated into other code, just remove this line and "...cvm::backup_file(...)" +#include "colvarmodule.h" + +namespace UIestimator { + const int Y_SIZE = 21; // defines the range of extended CV with respect to a given CV + // For example, CV=10, width=1, Y_SIZE=21, then eCV=[0-20], having a size of 21 + const int HALF_Y_SIZE = 10; + const int EXTENDED_X_SIZE = HALF_Y_SIZE; + const double EPSILON = 0.000001; // for comparison of float numbers + + class n_matrix { // Stores the distribution matrix of n(x,y) + + public: + n_matrix() {} + n_matrix(const std::vector<double> & lowerboundary, // lowerboundary of x + const std::vector<double> & upperboundary, // upperboundary of + const std::vector<double> & width, // width of x + const int y_size) { // size of y, for example, ysize=7, then when x=1, the distribution of y in [-2,4] is considered + + int i; + + this->lowerboundary = lowerboundary; + this->upperboundary = upperboundary; + this->width = width; + this->dimension = lowerboundary.size(); + this->y_size = y_size; // keep in mind the internal (spare) matrix is stored in diagonal form + this->y_total_size = int(pow(double(y_size), dimension) + EPSILON); + + // the range of the matrix is [lowerboundary, upperboundary] + x_total_size = 1; + for (i = 0; i < dimension; i++) { + x_size.push_back(int((upperboundary[i] - lowerboundary[i]) / width[i] + EPSILON)); + x_total_size *= x_size[i]; + } + + // initialize the internal matrix + matrix.reserve(x_total_size); + for (i = 0; i < x_total_size; i++) { + matrix.push_back(std::vector<int>(y_total_size, 0)); + } + + temp.resize(dimension); + } + + int inline get_value(const std::vector<double> & x, const std::vector<double> & y) { + return matrix[convert_x(x)][convert_y(x, y)]; + } + + void inline set_value(const std::vector<double> & x, const std::vector<double> & y, const int value) { + matrix[convert_x(x)][convert_y(x,y)] = value; + } + + void inline increase_value(const std::vector<double> & x, const std::vector<double> & y, const int value) { + matrix[convert_x(x)][convert_y(x,y)] += value; + } + + private: + std::vector<double> lowerboundary; + std::vector<double> upperboundary; + std::vector<double> width; + int dimension; + std::vector<int> x_size; // the size of x in each dimension + int x_total_size; // the size of x of the internal matrix + int y_size; // the size of y in each dimension + int y_total_size; // the size of y of the internal matrix + + std::vector<std::vector<int> > matrix; // the internal matrix + + std::vector<int> temp; // this vector is used in convert_x and convert_y to save computational resource + + int i, j; + + int convert_x(const std::vector<double> & x) { // convert real x value to its interal index + for (i = 0; i < dimension; i++) { + temp[i] = int((x[i] - lowerboundary[i]) / width[i] + EPSILON); + } + + int index = 0; + for (i = 0; i < dimension; i++) { + if (i + 1 < dimension) { + int x_temp = 1; + for (j = i + 1; j < dimension; j++) + x_temp *= x_size[j]; + index += temp[i] * x_temp; + } + else + index += temp[i]; + } + return index; + } + + int convert_y(const std::vector<double> & x, const std::vector<double> & y) { // convert real y value to its interal index + + int i; + + for (i = 0; i < dimension; i++) { + temp[i] = round((round(y[i] / width[i] + EPSILON) - round(x[i] / width[i] + EPSILON)) + (y_size - 1) / 2 + EPSILON); + } + + int index = 0; + for (i = 0; i < dimension; i++) { + if (i + 1 < dimension) + index += temp[i] * int(pow(double(y_size), dimension - i - 1) + EPSILON); + else + index += temp[i]; + } + return index; + } + + double round(double r) { + return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5); + } + }; + + // vector, store the sum_x, sum_x_square, count_y + template <typename T> + class n_vector { + + public: + n_vector() {} + n_vector(const std::vector<double> & lowerboundary, // lowerboundary of x + const std::vector<double> & upperboundary, // upperboundary of + const std::vector<double> & width, // width of x + const int y_size, // size of y, for example, ysize=7, then when x=1, the distribution of y in [-2,4] is considered + const T & default_value) { // the default value of T + + this->width = width; + this->dimension = lowerboundary.size(); + + x_total_size = 1; + for (int i = 0; i < dimension; i++) { + this->lowerboundary.push_back(lowerboundary[i] - (y_size - 1) / 2 * width[i] - EPSILON); + this->upperboundary.push_back(upperboundary[i] + (y_size - 1) / 2 * width[i] + EPSILON); + + x_size.push_back(int((this->upperboundary[i] - this->lowerboundary[i]) / this->width[i] + EPSILON)); + x_total_size *= x_size[i]; + } + + // initialize the internal vector + vector.resize(x_total_size, default_value); + + temp.resize(dimension); + } + + const T inline get_value(const std::vector<double> & x) { + return vector[convert_x(x)]; + } + + void inline set_value(const std::vector<double> & x, const T value) { + vector[convert_x(x)] = value; + } + + void inline increase_value(const std::vector<double> & x, const T value) { + vector[convert_x(x)] += value; + } + private: + std::vector<double> lowerboundary; + std::vector<double> upperboundary; + std::vector<double> width; + int dimension; + std::vector<int> x_size; // the size of x in each dimension + int x_total_size; // the size of x of the internal matrix + + std::vector<T> vector; // the internal vector + + std::vector<int> temp; // this vector is used in convert_x and convert_y to save computational resource + + int convert_x(const std::vector<double> & x) { // convert real x value to its interal index + + int i, j; + + for (i = 0; i < dimension; i++) { + temp[i] = int((x[i] - lowerboundary[i]) / width[i] + EPSILON); + } + + int index = 0; + for (i = 0; i < dimension; i++) { + if (i + 1 < dimension) { + int x_temp = 1; + for (j = i + 1; j < dimension; j++) + x_temp *= x_size[j]; + index += temp[i] * x_temp; + } + else + index += temp[i]; + } + return index; + } + }; + + class UIestimator { // the implemension of UI estimator + + public: + UIestimator() {} + + //called when (re)start an eabf simulation + UIestimator(const std::vector<double> & lowerboundary, + const std::vector<double> & upperboundary, + const std::vector<double> & width, + const std::vector<double> & krestr, // force constant in eABF + const std::string & output_filename, // the prefix of output files + const int output_freq, + const bool restart, // whether restart from a .count and a .grad file + const std::vector<std::string> & input_filename, // the prefixes of input files + const double temperature) { + + // initialize variables + this->lowerboundary = lowerboundary; + this->upperboundary = upperboundary; + this->width = width; + this->krestr = krestr; + this->output_filename = output_filename; + this->output_freq = output_freq; + this->restart = restart; + this->input_filename = input_filename; + this->temperature = temperature; + + int i, j; + + dimension = lowerboundary.size(); + + for (i = 0; i < dimension; i++) { + sum_x.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0)); + sum_x_square.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0)); + + x_av.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0)); + sigma_square.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0)); + } + + count_y = n_vector<int>(lowerboundary, upperboundary, width, Y_SIZE, 0); + distribution_x_y = n_matrix(lowerboundary, upperboundary, width, Y_SIZE); + + grad = n_vector<std::vector<double> >(lowerboundary, upperboundary, width, 1, std::vector<double>(dimension, 0.0)); + count = n_vector<int>(lowerboundary, upperboundary, width, 1, 0); + + written = false; + written_1D = false; + + if (dimension == 1) { + std::vector<double> upperboundary_temp = upperboundary; + upperboundary_temp[0] = upperboundary[0] + width[0]; + oneD_pmf = n_vector<double>(lowerboundary, upperboundary_temp, width, 1, 0.0); + } + + if (restart == true) { + input_grad = n_vector<std::vector<double> >(lowerboundary, upperboundary, width, 1, std::vector<double>(dimension, 0.0)); + input_count = n_vector<int>(lowerboundary, upperboundary, width, 1, 0); + + // initialize input_Grad and input_count + // the loop_flag is a n-dimensional vector, increae from lowerboundary to upperboundary when looping + std::vector<double> loop_flag(dimension, 0); + for (i = 0; i < dimension; i++) { + loop_flag[i] = lowerboundary[i]; + } + + i = 0; + while (i >= 0) { + for (j = 0; j < dimension; j++) { + input_grad.set_value(loop_flag, std::vector<double>(dimension,0)); + } + input_count.set_value(loop_flag, 0); + + // iterate over any dimensions + i = dimension - 1; + while (i >= 0) { + loop_flag[i] += width[i]; + if (loop_flag[i] > upperboundary[i] - width[i] + EPSILON) { + loop_flag[i] = lowerboundary[i]; + i--; + } + else + break; + } + } + read_inputfiles(input_filename); + } + } + + ~UIestimator() {} + + // called from MD engine every step + bool update(const int step, std::vector<double> x, std::vector<double> y) { + + int i; + + if (step % output_freq == 0) { + calc_pmf(); + write_files(); + //write_interal_data(); + } + + for (i = 0; i < dimension; i++) { + // for dihedral RC, it is possible that x = 179 and y = -179, should correct it + // may have problem, need to fix + if (x[i] > 150 && y[i] < -150) { + y[i] += 360; + } + if (x[i] < -150 && y[i] > 150) { + y[i] -= 360; + } + + if (x[i] < lowerboundary[i] - EXTENDED_X_SIZE * width[i] + EPSILON || x[i] > upperboundary[i] + EXTENDED_X_SIZE * width[i] - EPSILON \ + || y[i] - x[i] < -HALF_Y_SIZE * width[i] + EPSILON || y[i] - x[i] > HALF_Y_SIZE * width[i] - EPSILON \ + || y[i] - lowerboundary[i] < -HALF_Y_SIZE * width[i] + EPSILON || y[i] - upperboundary[i] > HALF_Y_SIZE * width[i] - EPSILON) + return false; + } + + for (i = 0; i < dimension; i++) { + sum_x[i].increase_value(y, x[i]); + sum_x_square[i].increase_value(y, x[i] * x[i]); + } + count_y.increase_value(y, 1); + + for (i = 0; i < dimension; i++) { + // adapt colvars precision + if (x[i] < lowerboundary[i] + EPSILON || x[i] > upperboundary[i] - EPSILON) + return false; + } + distribution_x_y.increase_value(x, y, 1); + + return true; + } + + // update the output_filename + void update_output_filename(const std::string& filename) { + output_filename = filename; + } + + private: + std::vector<n_vector<double> > sum_x; // the sum of x in each y bin + std::vector<n_vector<double> > sum_x_square; // the sum of x in each y bin + n_vector<int> count_y; // the distribution of y + n_matrix distribution_x_y; // the distribution of <x, y> pair + + int dimension; + + std::vector<double> lowerboundary; + std::vector<double> upperboundary; + std::vector<double> width; + std::vector<double> krestr; + std::string output_filename; + int output_freq; + bool restart; + std::vector<std::string> input_filename; + double temperature; + + n_vector<std::vector<double> > grad; + n_vector<int> count; + + n_vector<double> oneD_pmf; + + n_vector<std::vector<double> > input_grad; + n_vector<int> input_count; + + // used in double integration + std::vector<n_vector<double> > x_av; + std::vector<n_vector<double> > sigma_square; + + bool written; + bool written_1D; + + // calculate gradients from the internal variables + void calc_pmf() { + int norm; + int i, j, k; + + std::vector<double> loop_flag(dimension, 0); + for (i = 0; i < dimension; i++) { + loop_flag[i] = lowerboundary[i] - HALF_Y_SIZE * width[i]; + } + + i = 0; + while (i >= 0) { + norm = count_y.get_value(loop_flag) > 0 ? count_y.get_value(loop_flag) : 1; + for (j = 0; j < dimension; j++) { + x_av[j].set_value(loop_flag, sum_x[j].get_value(loop_flag) / norm); + sigma_square[j].set_value(loop_flag, sum_x_square[j].get_value(loop_flag) / norm - x_av[j].get_value(loop_flag) * x_av[j].get_value(loop_flag)); + } + + // iterate over any dimensions + i = dimension - 1; + while (i >= 0) { + loop_flag[i] += width[i]; + if (loop_flag[i] > upperboundary[i] + HALF_Y_SIZE * width[i] - width[i] + EPSILON) { + loop_flag[i] = lowerboundary[i] - HALF_Y_SIZE * width[i]; + i--; + } + else + break; + } + } + + // double integration + std::vector<double> av(dimension, 0); + std::vector<double> diff_av(dimension, 0); + + std::vector<double> loop_flag_x(dimension, 0); + std::vector<double> loop_flag_y(dimension, 0); + for (i = 0; i < dimension; i++) { + loop_flag_x[i] = lowerboundary[i]; + loop_flag_y[i] = loop_flag_x[i] - HALF_Y_SIZE * width[i]; + } + + i = 0; + while (i >= 0) { + norm = 0; + for (k = 0; k < dimension; k++) { + av[k] = 0; + diff_av[k] = 0; + loop_flag_y[k] = loop_flag_x[k] - HALF_Y_SIZE * width[k]; + } + + int j = 0; + while (j >= 0) { + norm += distribution_x_y.get_value(loop_flag_x, loop_flag_y); + for (k = 0; k < dimension; k++) { + if (sigma_square[k].get_value(loop_flag_y) > EPSILON || sigma_square[k].get_value(loop_flag_y) < -EPSILON) + av[k] += distribution_x_y.get_value(loop_flag_x, loop_flag_y) * ( (loop_flag_x[k] + 0.5 * width[k]) - x_av[k].get_value(loop_flag_y)) / sigma_square[k].get_value(loop_flag_y); + + diff_av[k] += distribution_x_y.get_value(loop_flag_x, loop_flag_y) * (loop_flag_x[k] - loop_flag_y[k]); + } + + // iterate over any dimensions + j = dimension - 1; + while (j >= 0) { + loop_flag_y[j] += width[j]; + if (loop_flag_y[j] > loop_flag_x[j] + HALF_Y_SIZE * width[j] - width[j] + EPSILON) { + loop_flag_y[j] = loop_flag_x[j] - HALF_Y_SIZE * width[j]; + j--; + } + else + break; + } + } + + std::vector<double> grad_temp(dimension, 0); + for (k = 0; k < dimension; k++) { + diff_av[k] /= (norm > 0 ? norm : 1); + av[k] = cvm::boltzmann() * temperature * av[k] / (norm > 0 ? norm : 1); + grad_temp[k] = av[k] - krestr[k] * diff_av[k]; + } + grad.set_value(loop_flag_x, grad_temp); + count.set_value(loop_flag_x, norm); + + // iterate over any dimensions + i = dimension - 1; + while (i >= 0) { + loop_flag_x[i] += width[i]; + if (loop_flag_x[i] > upperboundary[i] - width[i] + EPSILON) { + loop_flag_x[i] = lowerboundary[i]; + i--; + } + else + break; + } + } + } + + + // calculate 1D pmf + void calc_1D_pmf() + { + std::vector<double> last_position(1, 0); + std::vector<double> position(1, 0); + + double min = 0; + double dG = 0; + double i; + + oneD_pmf.set_value(lowerboundary, 0); + last_position = lowerboundary; + for (i = lowerboundary[0] + width[0]; i < upperboundary[0] + EPSILON; i += width[0]) { + position[0] = i + EPSILON; + if (restart == false || input_count.get_value(last_position) == 0) { + dG = oneD_pmf.get_value(last_position) + grad.get_value(last_position)[0] * width[0]; + } + else { + dG = oneD_pmf.get_value(last_position) + ((grad.get_value(last_position)[0] * count.get_value(last_position) + input_grad.get_value(last_position)[0] * input_count.get_value(last_position)) / (count.get_value(last_position) + input_count.get_value(last_position))) * width[0]; + } + if (dG < min) + min = dG; + oneD_pmf.set_value(position, dG); + last_position[0] = i + EPSILON; + } + + for (i = lowerboundary[0]; i < upperboundary[0] + EPSILON; i += width[0]) { + position[0] = i + EPSILON; + oneD_pmf.set_value(position, oneD_pmf.get_value(position) - min); + } + } + + // write 1D pmf + void write_1D_pmf() { + std::string pmf_filename = output_filename + ".UI.pmf"; + + // only for colvars module! + if (written_1D) cvm::backup_file(pmf_filename.c_str()); + + std::ostream* ofile_pmf = cvm::proxy->output_stream(pmf_filename.c_str()); + + std::vector<double> position(1, 0); + for (double i = lowerboundary[0]; i < upperboundary[0] + EPSILON; i += width[0]) { + *ofile_pmf << i << " "; + position[0] = i + EPSILON; + *ofile_pmf << oneD_pmf.get_value(position) << std::endl; + } + cvm::proxy->close_output_stream(pmf_filename.c_str()); + + written_1D = true; + } + + // write heads of the output files + void writehead(std::ostream& os) const { + os << "# " << dimension << std::endl; + for (int i = 0; i < dimension; i++) { + os << "# " << lowerboundary[i] << " " << width[i] << " " << int((upperboundary[i] - lowerboundary[i]) / width[i] + EPSILON) << " " << 0 << std::endl; + } + os << std::endl; + } + + // write interal data, used for testing + void write_interal_data() { + std::string internal_filename = output_filename + ".UI.internal"; + + std::ostream* ofile_internal = cvm::proxy->output_stream(internal_filename.c_str()); + + std::vector<double> loop_flag(dimension, 0); + for (int i = 0; i < dimension; i++) { + loop_flag[i] = lowerboundary[i]; + } + + int n = 0; + while (n >= 0) { + for (int j = 0; j < dimension; j++) { + *ofile_internal << loop_flag[j] + 0.5 * width[j] << " "; + } + + for (int k = 0; k < dimension; k++) { + *ofile_internal << grad.get_value(loop_flag)[k] << " "; + } + + std::vector<double> ii(dimension,0); + for (double i = loop_flag[0] - 10; i < loop_flag[0] + 10 + EPSILON; i+= width[0]) { + for (double j = loop_flag[1] - 10; j< loop_flag[1] + 10 + EPSILON; j+=width[1]) { + ii[0] = i; + ii[1] = j; + *ofile_internal << i <<" "<<j<<" "<< distribution_x_y.get_value(loop_flag,ii)<< " "; + } + } + *ofile_internal << std::endl; + + // iterate over any dimensions + n = dimension - 1; + while (n >= 0) { + loop_flag[n] += width[n]; + if (loop_flag[n] > upperboundary[n] - width[n] + EPSILON) { + loop_flag[n] = lowerboundary[n]; + n--; + } + else + break; + } + } + cvm::proxy->close_output_stream(internal_filename.c_str()); + } + + // write output files + void write_files() { + std::string grad_filename = output_filename + ".UI.grad"; + std::string hist_filename = output_filename + ".UI.hist.grad"; + std::string count_filename = output_filename + ".UI.count"; + + int i, j; +// + // only for colvars module! + if (written) cvm::backup_file(grad_filename.c_str()); + //if (written) cvm::backup_file(hist_filename.c_str()); + if (written) cvm::backup_file(count_filename.c_str()); + + std::ostream* ofile = cvm::proxy->output_stream(grad_filename.c_str()); + std::ostream* ofile_hist = cvm::proxy->output_stream(hist_filename.c_str(), std::ios::app); + std::ostream* ofile_count = cvm::proxy->output_stream(count_filename.c_str()); + + writehead(*ofile); + writehead(*ofile_hist); + writehead(*ofile_count); + + if (dimension == 1) { + calc_1D_pmf(); + write_1D_pmf(); + } + + std::vector<double> loop_flag(dimension, 0); + for (i = 0; i < dimension; i++) { + loop_flag[i] = lowerboundary[i]; + } + + i = 0; + while (i >= 0) { + for (j = 0; j < dimension; j++) { + *ofile << loop_flag[j] + 0.5 * width[j] << " "; + *ofile_hist << loop_flag[j] + 0.5 * width[j] << " "; + *ofile_count << loop_flag[j] + 0.5 * width[j] << " "; + } + + if (restart == false) { + for (j = 0; j < dimension; j++) { + *ofile << grad.get_value(loop_flag)[j] << " "; + *ofile_hist << grad.get_value(loop_flag)[j] << " "; + } + *ofile << std::endl; + *ofile_hist << std::endl; + *ofile_count << count.get_value(loop_flag) << " " <<std::endl; + } + else { + double final_grad = 0; + for (j = 0; j < dimension; j++) { + int total_count_temp = (count.get_value(loop_flag) + input_count.get_value(loop_flag)); + if (input_count.get_value(loop_flag) == 0) + final_grad = grad.get_value(loop_flag)[j]; + else + final_grad = ((grad.get_value(loop_flag)[j] * count.get_value(loop_flag) + input_grad.get_value(loop_flag)[j] * input_count.get_value(loop_flag)) / total_count_temp); + *ofile << final_grad << " "; + *ofile_hist << final_grad << " "; + } + *ofile << std::endl; + *ofile_hist << std::endl; + *ofile_count << (count.get_value(loop_flag) + input_count.get_value(loop_flag)) << " " <<std::endl; + } + + // iterate over any dimensions + i = dimension - 1; + while (i >= 0) { + loop_flag[i] += width[i]; + if (loop_flag[i] > upperboundary[i] - width[i] + EPSILON) { + loop_flag[i] = lowerboundary[i]; + i--; + *ofile << std::endl; + *ofile_hist << std::endl; + *ofile_count << std::endl; + } + else + break; + } + } + cvm::proxy->close_output_stream(grad_filename.c_str()); + cvm::proxy->close_output_stream(hist_filename.c_str()); + cvm::proxy->close_output_stream(count_filename.c_str()); + + written = true; + } + + // read input files + void read_inputfiles(const std::vector<std::string> input_filename) + { + char sharp; + double nothing; + int dimension_temp; + int i, j, k, l, m; + + std::vector<double> loop_bin_size(dimension, 0); + std::vector<double> position_temp(dimension, 0); + std::vector<double> grad_temp(dimension, 0); + int count_temp = 0; + for (i = 0; i < int(input_filename.size()); i++) { + int size = 1 , size_temp = 0; + + std::string count_filename = input_filename[i] + ".UI.count"; + std::string grad_filename = input_filename[i] + ".UI.grad"; + + std::ifstream count_file(count_filename.c_str(), std::ios::in); + std::ifstream grad_file(grad_filename.c_str(), std::ios::in); + + count_file >> sharp >> dimension_temp; + grad_file >> sharp >> dimension_temp; + + for (j = 0; j < dimension; j++) { + count_file >> sharp >> nothing >> nothing >> size_temp >> nothing; + grad_file >> sharp >> nothing >> nothing >> nothing >> nothing; + size *= size_temp; + } + + for (j = 0; j < size; j++) { + do { + for (k = 0; k < dimension; k++) { + count_file >> position_temp[k]; + grad_file >> nothing; + } + + for (l = 0; l < dimension; l++) { + grad_file >> grad_temp[l]; + } + count_file >> count_temp; + } + while (position_temp[i] < lowerboundary[i] - EPSILON || position_temp[i] > upperboundary[i] + EPSILON); + + if (count_temp == 0) { + continue; + } + + for (m = 0; m < dimension; m++) { + grad_temp[m] = (grad_temp[m] * count_temp + input_grad.get_value(position_temp)[m] * input_count.get_value(position_temp)) / (count_temp + input_count.get_value(position_temp)); + } + input_grad.set_value(position_temp, grad_temp); + input_count.increase_value(position_temp, count_temp); + } + + count_file.close(); + grad_file.close(); + } + } + }; +}; + +#endif diff --git a/lib/colvars/colvaratoms.cpp b/lib/colvars/colvaratoms.cpp index 9b4a922e3fbb2322455c17adca9194d2032ab6a1..d2a0f0a807d2727500d96e6a19a56eaa0af23f39 100644 --- a/lib/colvars/colvaratoms.cpp +++ b/lib/colvars/colvaratoms.cpp @@ -817,6 +817,18 @@ int cvm::atom_group::create_sorted_ids(void) } +int cvm::atom_group::overlap(const atom_group &g1, const atom_group &g2){ + for (cvm::atom_const_iter ai1 = g1.begin(); ai1 != g1.end(); ai1++) { + for (cvm::atom_const_iter ai2 = g2.begin(); ai2 != g2.end(); ai2++) { + if (ai1->id == ai2->id) { + return (ai1->id + 1); // 1-based index to allow boolean usage + } + } + } + return 0; +} + + void cvm::atom_group::center_ref_pos() { ref_pos_cog = cvm::atom_pos(0.0, 0.0, 0.0); diff --git a/lib/colvars/colvaratoms.h b/lib/colvars/colvaratoms.h index 6113fb38a938b37a56d54825c51d43252d5b54db..71c587e23084e516c6b66358e97327c8d404822f 100644 --- a/lib/colvars/colvaratoms.h +++ b/lib/colvars/colvaratoms.h @@ -214,6 +214,12 @@ public: { return ag_features; } + static void delete_features() { + for (size_t i=0; i < ag_features.size(); i++) { + delete ag_features[i]; + } + ag_features.clear(); + } protected: @@ -280,6 +286,10 @@ public: /// Allocates and populates the sorted list of atom ids int create_sorted_ids(void); + /// Detect whether two groups share atoms + /// If yes, returns 1-based number of a common atom; else, returns 0 + static int overlap(const atom_group &g1, const atom_group &g2); + /// \brief When updating atomic coordinates, translate them to align with the /// center of mass of the reference coordinates bool b_center; diff --git a/lib/colvars/colvarbias.cpp b/lib/colvars/colvarbias.cpp index 636727ca39b4f90b8b9686a64b51845857843c13..301e83e73015a91f14ff75a82c045954083d4074 100644 --- a/lib/colvars/colvarbias.cpp +++ b/lib/colvars/colvarbias.cpp @@ -10,6 +10,7 @@ #include "colvarmodule.h" #include "colvarvalue.h" #include "colvarbias.h" +#include "colvargrid.h" colvarbias::colvarbias(char const *key) @@ -31,12 +32,14 @@ int colvarbias::init(std::string const &conf) { colvarparse::init(conf); + size_t i = 0; + if (name.size() == 0) { // first initialization cvm::log("Initializing a new \""+bias_type+"\" instance.\n"); - rank = cvm::num_biases_type(bias_type); + rank = cvm::main()->num_biases_type(bias_type); get_keyval(conf, "name", name, bias_type+cvm::to_str(rank)); { @@ -62,7 +65,7 @@ int colvarbias::init(std::string const &conf) INPUT_ERROR); return INPUT_ERROR; } - for (size_t i = 0; i < colvar_names.size(); i++) { + for (i = 0; i < colvar_names.size(); i++) { add_colvar(colvar_names[i]); } } @@ -148,6 +151,13 @@ int colvarbias::clear() } +int colvarbias::clear_state_data() +{ + // no mutable content to delete for base class + return COLVARS_OK; +} + + int colvarbias::add_colvar(std::string const &cv_name) { if (colvar *cv = cvm::colvar_by_name(cv_name)) { @@ -164,6 +174,8 @@ int colvarbias::add_colvar(std::string const &cv_name) colvar_forces.back().is_derivative(); // colvar constraints are not applied to the force colvar_forces.back().reset(); + previous_colvar_forces.push_back(colvar_forces.back()); + cv->biases.push_back(this); // add back-reference to this bias to colvar if (is_enabled(f_cvb_apply_force)) { @@ -204,7 +216,8 @@ int colvarbias::update() void colvarbias::communicate_forces() { - for (size_t i = 0; i < num_variables(); i++) { + size_t i = 0; + for (i = 0; i < num_variables(); i++) { if (cvm::debug()) { cvm::log("Communicating a force to colvar \""+ variables(i)->name+"\".\n"); @@ -216,6 +229,9 @@ void colvarbias::communicate_forces() // aware of this bias' time_step_factor variables(i)->add_bias_force(cvm::real(time_step_factor) * colvar_forces[i]); } + for (i = 0; i < num_variables(); i++) { + previous_colvar_forces[i] = colvar_forces[i]; + } } @@ -389,6 +405,259 @@ std::ostream & colvarbias::write_traj(std::ostream &os) return os; } + + +colvarbias_ti::colvarbias_ti(char const *key) + : colvarbias(key) +{ + provide(f_cvb_calc_ti_samples); + ti_avg_forces = NULL; + ti_count = NULL; +} + + +colvarbias_ti::~colvarbias_ti() +{ + colvarbias_ti::clear_state_data(); +} + + +int colvarbias_ti::clear_state_data() +{ + if (ti_avg_forces != NULL) { + delete ti_avg_forces; + ti_avg_forces = NULL; + } + if (ti_count != NULL) { + delete ti_count; + ti_count = NULL; + } + return COLVARS_OK; +} + + +int colvarbias_ti::init(std::string const &conf) +{ + int error_code = COLVARS_OK; + + get_keyval_feature(this, conf, "writeTISamples", + f_cvb_write_ti_samples, + is_enabled(f_cvb_write_ti_samples)); + + get_keyval_feature(this, conf, "writeTIPMF", + f_cvb_write_ti_pmf, + is_enabled(f_cvb_write_ti_pmf)); + + if ((num_variables() > 1) && is_enabled(f_cvb_write_ti_pmf)) { + return cvm::error("Error: only 1-dimensional PMFs can be written " + "on the fly.\n" + "Consider using writeTISamples instead and " + "post-processing the sampled free-energy gradients.\n", + COLVARS_NOT_IMPLEMENTED); + } else { + error_code |= init_grids(); + } + + if (is_enabled(f_cvb_write_ti_pmf)) { + enable(f_cvb_write_ti_samples); + } + + if (is_enabled(f_cvb_calc_ti_samples)) { + std::vector<std::string> const time_biases = + cvm::main()->time_dependent_biases(); + if (time_biases.size() > 0) { + if ((time_biases.size() > 1) || (time_biases[0] != this->name)) { + for (size_t i = 0; i < num_variables(); i++) { + if (! variables(i)->is_enabled(f_cv_subtract_applied_force)) { + return cvm::error("Error: cannot collect TI samples while other " + "time-dependent biases are active and not all " + "variables have subtractAppliedForces on.\n", + INPUT_ERROR); + } + } + } + } + } + + return error_code; +} + + +int colvarbias_ti::init_grids() +{ + if (is_enabled(f_cvb_calc_ti_samples)) { + if (ti_avg_forces == NULL) { + ti_bin.resize(num_variables()); + ti_system_forces.resize(num_variables()); + for (size_t icv = 0; icv < num_variables(); icv++) { + ti_system_forces[icv].type(variables(icv)->value()); + ti_system_forces[icv].is_derivative(); + ti_system_forces[icv].reset(); + } + ti_avg_forces = new colvar_grid_gradient(colvars); + ti_count = new colvar_grid_count(colvars); + ti_avg_forces->samples = ti_count; + ti_count->has_parent_data = true; + } + } + + return COLVARS_OK; +} + + +int colvarbias_ti::update() +{ + return update_system_forces(NULL); +} + + +int colvarbias_ti::update_system_forces(std::vector<colvarvalue> const + *subtract_forces) +{ + if (! is_enabled(f_cvb_calc_ti_samples)) { + return COLVARS_OK; + } + + has_data = true; + + if (cvm::debug()) { + cvm::log("Updating system forces for bias "+this->name+"\n"); + } + + colvarproxy *proxy = cvm::main()->proxy; + + size_t i; + + if (proxy->total_forces_same_step()) { + for (i = 0; i < num_variables(); i++) { + ti_bin[i] = ti_avg_forces->current_bin_scalar(i); + } + } + + // Collect total colvar forces + if ((cvm::step_relative() > 0) || proxy->total_forces_same_step()) { + if (ti_avg_forces->index_ok(ti_bin)) { + for (i = 0; i < num_variables(); i++) { + if (variables(i)->is_enabled(f_cv_subtract_applied_force)) { + // this colvar is already subtracting all applied forces + ti_system_forces[i] = variables(i)->total_force(); + } else { + ti_system_forces[i] = variables(i)->total_force() - + ((subtract_forces != NULL) ? + (*subtract_forces)[i] : previous_colvar_forces[i]); + } + } + ti_avg_forces->acc_value(ti_bin, ti_system_forces); + } + } + + if (!proxy->total_forces_same_step()) { + // Set the index for use in the next iteration, when total forces come in + for (i = 0; i < num_variables(); i++) { + ti_bin[i] = ti_avg_forces->current_bin_scalar(i); + } + } + + return COLVARS_OK; +} + + +std::string const colvarbias_ti::get_state_params() const +{ + return std::string(""); +} + + +int colvarbias_ti::set_state_params(std::string const &state_conf) +{ + return COLVARS_OK; +} + + +std::ostream & colvarbias_ti::write_state_data(std::ostream &os) +{ + if (! is_enabled(f_cvb_calc_ti_samples)) { + return os; + } + os << "\nhistogram\n"; + ti_count->write_raw(os); + os << "\nsystem_forces\n"; + ti_avg_forces->write_raw(os); + return os; +} + + +std::istream & colvarbias_ti::read_state_data(std::istream &is) +{ + if (! is_enabled(f_cvb_calc_ti_samples)) { + return is; + } + if (cvm::debug()) { + cvm::log("Reading state data for the TI estimator.\n"); + } + if (! read_state_data_key(is, "histogram")) { + return is; + } + if (! ti_count->read_raw(is)) { + return is; + } + if (! read_state_data_key(is, "system_forces")) { + return is; + } + if (! ti_avg_forces->read_raw(is)) { + return is; + } + if (cvm::debug()) { + cvm::log("Done reading state data for the TI estimator.\n"); + } + return is; +} + + +int colvarbias_ti::write_output_files() +{ + if (!has_data) { + // nothing to write + return COLVARS_OK; + } + + std::string const ti_output_prefix = cvm::output_prefix()+"."+this->name; + + std::ostream *os = NULL; + + if (is_enabled(f_cvb_write_ti_samples)) { + std::string const ti_count_file_name(ti_output_prefix+".ti.count"); + os = cvm::proxy->output_stream(ti_count_file_name); + if (os) { + ti_count->write_multicol(*os); + cvm::proxy->close_output_stream(ti_count_file_name); + } + + std::string const ti_grad_file_name(ti_output_prefix+".ti.grad"); + os = cvm::proxy->output_stream(ti_grad_file_name); + if (os) { + ti_avg_forces->write_multicol(*os); + cvm::proxy->close_output_stream(ti_grad_file_name); + } + } + + if (is_enabled(f_cvb_write_ti_pmf)) { + std::string const pmf_file_name(ti_output_prefix+".ti.pmf"); + cvm::log("Writing TI PMF to file \""+pmf_file_name+"\".\n"); + os = cvm::proxy->output_stream(pmf_file_name); + if (os) { + // get the FE gradient + ti_avg_forces->multiply_constant(-1.0); + ti_avg_forces->write_1D_integral(*os); + ti_avg_forces->multiply_constant(-1.0); + cvm::proxy->close_output_stream(pmf_file_name); + } + } + + return COLVARS_OK; +} + + // Static members std::vector<colvardeps::feature *> colvarbias::cvb_features; diff --git a/lib/colvars/colvarbias.h b/lib/colvars/colvarbias.h index a147cd3210486cc74d80f5e0c0e5f859e0ea1b70..083b9d73036da056a2c76d3cfb8cef600049c1f3 100644 --- a/lib/colvars/colvarbias.h +++ b/lib/colvars/colvarbias.h @@ -109,6 +109,9 @@ public: /// \brief Delete everything virtual int clear(); + /// \brief Delete only the allocatable data (save memory) + virtual int clear_state_data(); + /// Destructor virtual ~colvarbias(); @@ -183,6 +186,12 @@ public: { return cvb_features; } + static void delete_features() { + for (size_t i=0; i < cvb_features.size(); i++) { + delete cvb_features[i]; + } + cvb_features.clear(); + } protected: @@ -194,6 +203,9 @@ protected: /// \brief Current forces from this bias to the variables std::vector<colvarvalue> colvar_forces; + /// \brief Forces last applied by this bias to the variables + std::vector<colvarvalue> previous_colvar_forces; + /// \brief Current energy of this bias (colvar_forces should be obtained by deriving this) cvm::real bias_energy; @@ -209,4 +221,48 @@ protected: }; + +class colvar_grid_gradient; +class colvar_grid_count; + +/// \brief Base class for unconstrained thermodynamic-integration FE estimator +class colvarbias_ti : public virtual colvarbias { +public: + + colvarbias_ti(char const *key); + virtual ~colvarbias_ti(); + + virtual int clear_state_data(); + + virtual int init(std::string const &conf); + virtual int init_grids(); + virtual int update(); + + /// Subtract applied forces (either last forces or argument) from the total + /// forces + virtual int update_system_forces(std::vector<colvarvalue> const + *subtract_forces); + + virtual std::string const get_state_params() const; + virtual int set_state_params(std::string const &state_conf); + virtual std::ostream & write_state_data(std::ostream &os); + virtual std::istream & read_state_data(std::istream &is); + virtual int write_output_files(); + +protected: + + /// \brief Forces exerted from the system to the associated variables + std::vector<colvarvalue> ti_system_forces; + + /// Averaged system forces + colvar_grid_gradient *ti_avg_forces; + + /// Histogram of sampled data + colvar_grid_count *ti_count; + + /// Because total forces may be from the last simulation step, + /// store the index of the variables then + std::vector<int> ti_bin; +}; + #endif diff --git a/lib/colvars/colvarbias_abf.cpp b/lib/colvars/colvarbias_abf.cpp index a96fc21d644e750d1a574596e4c3f0b99c87930f..e4aea8eb86e7dcb6712fc6dc2c771adeeb6909f6 100644 --- a/lib/colvars/colvarbias_abf.cpp +++ b/lib/colvars/colvarbias_abf.cpp @@ -14,6 +14,8 @@ colvarbias_abf::colvarbias_abf(char const *key) : colvarbias(key), + b_UI_estimator(false), + b_CZAR_estimator(false), system_force(NULL), gradients(NULL), samples(NULL), @@ -159,6 +161,7 @@ int colvarbias_abf::init(std::string const &conf) // Data for eABF z-based estimator if (b_extended) { + get_keyval(conf, "CZARestimator", b_CZAR_estimator, true); // CZAR output files for stratified eABF get_keyval(conf, "writeCZARwindowFile", b_czar_window_file, false, colvarparse::parse_silent); @@ -187,8 +190,38 @@ int colvarbias_abf::init(std::string const &conf) read_gradients_samples(); } - cvm::log("Finished ABF setup.\n"); + // if extendedLangrangian is on, then call UI estimator + if (b_extended) { + get_keyval(conf, "UIestimator", b_UI_estimator, false); + + if (b_UI_estimator) { + std::vector<double> UI_lowerboundary; + std::vector<double> UI_upperboundary; + std::vector<double> UI_width; + std::vector<double> UI_krestr; + + bool UI_restart = (input_prefix.size() > 0); + + for (size_t i = 0; i < colvars.size(); i++) + { + UI_lowerboundary.push_back(colvars[i]->lower_boundary); + UI_upperboundary.push_back(colvars[i]->upper_boundary); + UI_width.push_back(colvars[i]->width); + UI_krestr.push_back(colvars[i]->force_constant()); + } + eabf_UI = UIestimator::UIestimator(UI_lowerboundary, + UI_upperboundary, + UI_width, + UI_krestr, // force constant in eABF + output_prefix, // the prefix of output files + cvm::restart_out_freq, + UI_restart, // whether restart from a .count and a .grad file + input_prefix, // the prefixes of input files + cvm::temperature()); + } + } + cvm::log("Finished ABF setup.\n"); return COLVARS_OK; } @@ -271,6 +304,10 @@ int colvarbias_abf::update() // and subtract previous ABF force if necessary update_system_force(i); } + if (cvm::proxy->total_forces_same_step()) { + // e.g. in LAMMPS, total forces are current + force_bin = bin; + } gradients->acc_force(force_bin, system_force); } if ( z_gradients && update_bias ) { @@ -288,8 +325,11 @@ int colvarbias_abf::update() } } - // save bin for next timestep - force_bin = bin; + if (!cvm::proxy->total_forces_same_step()) { + // e.g. in NAMD, total forces will be available for next timestep + // hence we store the current colvar bin + force_bin = bin; + } // Reset biasing forces from previous timestep for (size_t i = 0; i < colvars.size(); i++) { @@ -332,7 +372,7 @@ int colvarbias_abf::update() } // update the output prefix; TODO: move later to setup_output() function - if (cvm::num_biases_feature(colvardeps::f_cvb_calc_pmf) == 1) { + if (cvm::main()->num_biases_feature(colvardeps::f_cvb_calc_pmf) == 1) { // This is the only bias computing PMFs output_prefix = cvm::output_prefix(); } else { @@ -364,6 +404,20 @@ int colvarbias_abf::update() cvm::log("Prepared sample and gradient buffers at step "+cvm::to_str(cvm::step_absolute())+"."); } + // update UI estimator every step + if (b_UI_estimator) + { + std::vector<double> x(colvars.size(),0); + std::vector<double> y(colvars.size(),0); + for (size_t i = 0; i < colvars.size(); i++) + { + x[i] = colvars[i]->actual_value(); + y[i] = colvars[i]->value(); + } + eabf_UI.update_output_filename(output_prefix); + eabf_UI.update(cvm::step_absolute(), x, y); + } + return COLVARS_OK; } @@ -479,8 +533,8 @@ void colvarbias_abf::write_gradients_samples(const std::string &prefix, bool app cvm::proxy->close_output_stream(pmf_out_name); } - if (z_gradients) { - // Write eABF-related quantities + if (b_CZAR_estimator) { + // Write eABF CZAR-related quantities std::string z_samples_out_name = prefix + ".zcount"; @@ -588,7 +642,7 @@ void colvarbias_abf::read_gradients_samples() is.close(); } - if (z_gradients) { + if (b_CZAR_estimator) { // Read eABF z-averaged data for CZAR cvm::log("Reading z-histogram from " + z_samples_in_name + " and z-gradient from " + z_gradients_in_name); @@ -621,7 +675,7 @@ std::ostream & colvarbias_abf::write_state_data(std::ostream& os) os << "\ngradient\n"; gradients->write_raw(os, 8); - if (z_gradients) { + if (b_CZAR_estimator) { os.setf(std::ios::fmtflags(0), std::ios::floatfield); // default floating-point format os << "\nz_samples\n"; z_samples->write_raw(os, 8); @@ -655,7 +709,7 @@ std::istream & colvarbias_abf::read_state_data(std::istream& is) return is; } - if (z_gradients) { + if (b_CZAR_estimator) { if (! read_state_data_key(is, "z_samples")) { return is; diff --git a/lib/colvars/colvarbias_abf.h b/lib/colvars/colvarbias_abf.h index 41a5475fa70087ca709e4ca2be9f0f118874a311..1defe72268c487e82a536d2f5e441e1067897272 100644 --- a/lib/colvars/colvarbias_abf.h +++ b/lib/colvars/colvarbias_abf.h @@ -17,6 +17,7 @@ #include "colvarbias.h" #include "colvargrid.h" +#include "colvar_UIestimator.h" typedef cvm::real* gradient_t; @@ -50,6 +51,12 @@ private: /// Write CZAR output file for stratified eABF (.zgrad) bool b_czar_window_file; size_t history_freq; + /// Umbrella Integration estimator of free energy from eABF + UIestimator::UIestimator eabf_UI; + // Run UI estimator? + bool b_UI_estimator; + // Run CZAR estimator? + bool b_CZAR_estimator; /// Cap applied biasing force? bool cap_force; diff --git a/lib/colvars/colvarbias_meta.cpp b/lib/colvars/colvarbias_meta.cpp index 66806fc9fca01d57a596344ff2fd62a120272442..b0d154dfc9ebf591f49b15ba517f4bcf7ba10317 100644 --- a/lib/colvars/colvarbias_meta.cpp +++ b/lib/colvars/colvarbias_meta.cpp @@ -33,7 +33,7 @@ colvarbias_meta::colvarbias_meta(char const *key) - : colvarbias(key) + : colvarbias(key), colvarbias_ti(key) { new_hills_begin = hills.end(); hills_traj_os = NULL; @@ -44,6 +44,7 @@ colvarbias_meta::colvarbias_meta(char const *key) int colvarbias_meta::init(std::string const &conf) { colvarbias::init(conf); + colvarbias_ti::init(conf); enable(f_cvb_calc_pmf); @@ -104,7 +105,7 @@ int colvarbias_meta::init(std::string const &conf) get_keyval(conf, "dumpFreeEnergyFile", dump_fes, true, colvarparse::parse_silent); if (get_keyval(conf, "saveFreeEnergyFile", dump_fes_save, false, colvarparse::parse_silent)) { cvm::log("Option \"saveFreeEnergyFile\" is deprecated, " - "please use \"keepFreeEnergyFile\" instead."); + "please use \"keepFreeEnergyFiles\" instead."); } get_keyval(conf, "keepFreeEnergyFiles", dump_fes_save, dump_fes_save); @@ -230,15 +231,7 @@ int colvarbias_meta::init_ebmeta_params(std::string const &conf) colvarbias_meta::~colvarbias_meta() { - if (hills_energy) { - delete hills_energy; - hills_energy = NULL; - } - - if (hills_energy_gradients) { - delete hills_energy_gradients; - hills_energy_gradients = NULL; - } + colvarbias_meta::clear_state_data(); if (replica_hills_os) { cvm::proxy->close_output_stream(replica_hills_file); @@ -250,13 +243,31 @@ colvarbias_meta::~colvarbias_meta() hills_traj_os = NULL; } - if(target_dist) { + if (target_dist) { delete target_dist; target_dist = NULL; } } +int colvarbias_meta::clear_state_data() +{ + if (hills_energy) { + delete hills_energy; + hills_energy = NULL; + } + + if (hills_energy_gradients) { + delete hills_energy_gradients; + hills_energy_gradients = NULL; + } + + hills.clear(); + hills_off_grid.clear(); + + return COLVARS_OK; +} + // ********************************************************************** // Hill management member functions @@ -336,6 +347,9 @@ int colvarbias_meta::update() // update base class error_code |= colvarbias::update(); + // update the TI estimator (if defined) + error_code |= colvarbias_ti::update(); + // update grid definition, if needed error_code |= update_grid_params(); // add new biasing energy/forces @@ -1000,6 +1014,10 @@ void colvarbias_meta::update_replicas_registry() (replicas.back())->hills_energy = new colvar_grid_scalar(colvars); (replicas.back())->hills_energy_gradients = new colvar_grid_gradient(colvars); } + if (is_enabled(f_cvb_calc_ti_samples)) { + (replicas.back())->enable(f_cvb_calc_ti_samples); + (replicas.back())->colvarbias_ti::init_grids(); + } } } } else { @@ -1374,6 +1392,8 @@ std::istream & colvarbias_meta::read_state_data(std::istream& is) } } + colvarbias_ti::read_state_data(is); + if (cvm::debug()) cvm::log("colvarbias_meta::read_restart() done\n"); @@ -1474,7 +1494,7 @@ std::istream & colvarbias_meta::read_hill(std::istream &is) int colvarbias_meta::setup_output() { output_prefix = cvm::output_prefix(); - if (cvm::num_biases_feature(colvardeps::f_cvb_calc_pmf) > 1) { + if (cvm::main()->num_biases_feature(colvardeps::f_cvb_calc_pmf) > 1) { // if this is not the only free energy integrator, append // this bias's name, to distinguish it from the output of the other // biases producing a .pmf file @@ -1631,6 +1651,7 @@ std::ostream & colvarbias_meta::write_state_data(std::ostream& os) } } + colvarbias_ti::write_state_data(os); return os; } @@ -1651,6 +1672,7 @@ int colvarbias_meta::write_state_to_replicas() int colvarbias_meta::write_output_files() { + colvarbias_ti::write_output_files(); if (dump_fes) { write_pmf(); } diff --git a/lib/colvars/colvarbias_meta.h b/lib/colvars/colvarbias_meta.h index 249f7342bc4b982b3747c56a11f9362b63914a41..78b2d35d41bb581d9b6da2297cf5886285415de5 100644 --- a/lib/colvars/colvarbias_meta.h +++ b/lib/colvars/colvarbias_meta.h @@ -19,7 +19,10 @@ #include "colvargrid.h" /// Metadynamics bias (implementation of \link colvarbias \endlink) -class colvarbias_meta : public colvarbias { +class colvarbias_meta + : public virtual colvarbias, + public virtual colvarbias_ti +{ public: @@ -35,10 +38,13 @@ public: Communication comm; colvarbias_meta(char const *key); + virtual ~colvarbias_meta(); + virtual int init(std::string const &conf); virtual int init_well_tempered_params(std::string const &conf); virtual int init_ebmeta_params(std::string const &conf); - virtual ~colvarbias_meta(); + + virtual int clear_state_data(); virtual int update(); virtual int update_grid_params(); diff --git a/lib/colvars/colvarbias_restraint.cpp b/lib/colvars/colvarbias_restraint.cpp index 70beca29fa79eb25b328c0afa1d3c8cefdf8226a..23534f56eb43c1cd0a2e257ed8581449026d0c8f 100644 --- a/lib/colvars/colvarbias_restraint.cpp +++ b/lib/colvars/colvarbias_restraint.cpp @@ -14,7 +14,7 @@ colvarbias_restraint::colvarbias_restraint(char const *key) - : colvarbias(key) + : colvarbias(key), colvarbias_ti(key) { } @@ -24,6 +24,8 @@ int colvarbias_restraint::init(std::string const &conf) colvarbias::init(conf); enable(f_cvb_apply_force); + colvarbias_ti::init(conf); + if (cvm::debug()) cvm::log("Initializing a new restraint bias.\n"); @@ -86,7 +88,7 @@ std::ostream & colvarbias_restraint::write_traj(std::ostream &os) colvarbias_restraint_centers::colvarbias_restraint_centers(char const *key) - : colvarbias(key), colvarbias_restraint(key) + : colvarbias(key), colvarbias_ti(key), colvarbias_restraint(key) { } @@ -145,7 +147,7 @@ int colvarbias_restraint_centers::change_configuration(std::string const &conf) colvarbias_restraint_k::colvarbias_restraint_k(char const *key) - : colvarbias(key), colvarbias_restraint(key) + : colvarbias(key), colvarbias_ti(key), colvarbias_restraint(key) { force_k = -1.0; } @@ -237,6 +239,7 @@ int colvarbias_restraint_moving::set_state_params(std::string const &conf) colvarbias_restraint_centers_moving::colvarbias_restraint_centers_moving(char const *key) : colvarbias(key), + colvarbias_ti(key), colvarbias_restraint(key), colvarbias_restraint_centers(key), colvarbias_restraint_moving(key) @@ -284,14 +287,17 @@ int colvarbias_restraint_centers_moving::init(std::string const &conf) target_centers[i], 0.5); } + + get_keyval(conf, "outputAccumulatedWork", b_output_acc_work, + b_output_acc_work); // TODO this conflicts with stages + } else { target_centers.clear(); - return COLVARS_OK; } + // Output restraint centers even when they do not change; some NAMD REUS + // scripts expect this behavior get_keyval(conf, "outputCenters", b_output_centers, b_output_centers); - get_keyval(conf, "outputAccumulatedWork", b_output_acc_work, - b_output_acc_work); // TODO this conflicts with stages return COLVARS_OK; } @@ -475,6 +481,7 @@ std::ostream & colvarbias_restraint_centers_moving::write_traj(std::ostream &os) colvarbias_restraint_k_moving::colvarbias_restraint_k_moving(char const *key) : colvarbias(key), + colvarbias_ti(key), colvarbias_restraint(key), colvarbias_restraint_k(key), colvarbias_restraint_moving(key) @@ -712,6 +719,7 @@ std::ostream & colvarbias_restraint::write_state(std::ostream &os) colvarbias_restraint_harmonic::colvarbias_restraint_harmonic(char const *key) : colvarbias(key), + colvarbias_ti(key), colvarbias_restraint(key), colvarbias_restraint_centers(key), colvarbias_restraint_moving(key), @@ -743,17 +751,22 @@ int colvarbias_restraint_harmonic::init(std::string const &conf) int colvarbias_restraint_harmonic::update() { + int error_code = COLVARS_OK; + + // update the TI estimator (if defined) + error_code |= colvarbias_ti::update(); + // update parameters (centers or force constant) - colvarbias_restraint_centers_moving::update(); - colvarbias_restraint_k_moving::update(); + error_code |= colvarbias_restraint_centers_moving::update(); + error_code |= colvarbias_restraint_k_moving::update(); // update restraint energy and forces - colvarbias_restraint::update(); + error_code |= colvarbias_restraint::update(); // update accumulated work using the current forces - colvarbias_restraint_centers_moving::update_acc_work(); + error_code |= colvarbias_restraint_centers_moving::update_acc_work(); - return COLVARS_OK; + return error_code; } @@ -798,6 +811,18 @@ int colvarbias_restraint_harmonic::set_state_params(std::string const &conf) } +std::ostream & colvarbias_restraint_harmonic::write_state_data(std::ostream &os) +{ + return colvarbias_ti::write_state_data(os); +} + + +std::istream & colvarbias_restraint_harmonic::read_state_data(std::istream &is) +{ + return colvarbias_ti::read_state_data(is); +} + + std::ostream & colvarbias_restraint_harmonic::write_traj_label(std::ostream &os) { colvarbias_restraint::write_traj_label(os); @@ -845,6 +870,7 @@ cvm::real colvarbias_restraint_harmonic::energy_difference(std::string const &co colvarbias_restraint_harmonic_walls::colvarbias_restraint_harmonic_walls(char const *key) : colvarbias(key), + colvarbias_ti(key), colvarbias_restraint(key), colvarbias_restraint_k(key), colvarbias_restraint_moving(key), @@ -967,11 +993,15 @@ int colvarbias_restraint_harmonic_walls::init(std::string const &conf) int colvarbias_restraint_harmonic_walls::update() { - colvarbias_restraint_k_moving::update(); + int error_code = COLVARS_OK; - colvarbias_restraint::update(); + error_code |= colvarbias_ti::update(); - return COLVARS_OK; + error_code |= colvarbias_restraint_k_moving::update(); + + error_code |= colvarbias_restraint::update(); + + return error_code; } @@ -1065,6 +1095,18 @@ int colvarbias_restraint_harmonic_walls::set_state_params(std::string const &con } +std::ostream & colvarbias_restraint_harmonic_walls::write_state_data(std::ostream &os) +{ + return colvarbias_ti::write_state_data(os); +} + + +std::istream & colvarbias_restraint_harmonic_walls::read_state_data(std::istream &is) +{ + return colvarbias_ti::read_state_data(is); +} + + std::ostream & colvarbias_restraint_harmonic_walls::write_traj_label(std::ostream &os) { colvarbias_restraint::write_traj_label(os); @@ -1084,6 +1126,7 @@ std::ostream & colvarbias_restraint_harmonic_walls::write_traj(std::ostream &os) colvarbias_restraint_linear::colvarbias_restraint_linear(char const *key) : colvarbias(key), + colvarbias_ti(key), colvarbias_restraint(key), colvarbias_restraint_centers(key), colvarbias_restraint_moving(key), @@ -1120,17 +1163,22 @@ int colvarbias_restraint_linear::init(std::string const &conf) int colvarbias_restraint_linear::update() { + int error_code = COLVARS_OK; + + // update the TI estimator (if defined) + error_code |= colvarbias_ti::update(); + // update parameters (centers or force constant) - colvarbias_restraint_centers_moving::update(); - colvarbias_restraint_k_moving::update(); + error_code |= colvarbias_restraint_centers_moving::update(); + error_code |= colvarbias_restraint_k_moving::update(); // update restraint energy and forces - colvarbias_restraint::update(); + error_code |= colvarbias_restraint::update(); // update accumulated work using the current forces - colvarbias_restraint_centers_moving::update_acc_work(); + error_code |= colvarbias_restraint_centers_moving::update_acc_work(); - return COLVARS_OK; + return error_code; } @@ -1196,6 +1244,18 @@ int colvarbias_restraint_linear::set_state_params(std::string const &conf) } +std::ostream & colvarbias_restraint_linear::write_state_data(std::ostream &os) +{ + return colvarbias_ti::write_state_data(os); +} + + +std::istream & colvarbias_restraint_linear::read_state_data(std::istream &is) +{ + return colvarbias_ti::read_state_data(is); +} + + std::ostream & colvarbias_restraint_linear::write_traj_label(std::ostream &os) { colvarbias_restraint::write_traj_label(os); diff --git a/lib/colvars/colvarbias_restraint.h b/lib/colvars/colvarbias_restraint.h index 8c3a1537fc881cbca3dc8ef66fcf3caeb4d49c90..b10649cab112f8a10bfedaff0c10823cc7a43f04 100644 --- a/lib/colvars/colvarbias_restraint.h +++ b/lib/colvars/colvarbias_restraint.h @@ -16,7 +16,8 @@ /// see derived classes for specific types /// (implementation of \link colvarbias \endlink) class colvarbias_restraint - : public virtual colvarbias + : public virtual colvarbias, + public virtual colvarbias_ti { public: @@ -95,7 +96,7 @@ protected: /// Options to change the restraint configuration over time (shared between centers and k moving) class colvarbias_restraint_moving - : public virtual colvarparse { + : public virtual colvarparse, public virtual colvardeps { public: colvarbias_restraint_moving(char const *key); @@ -226,6 +227,8 @@ public: virtual int update(); virtual std::string const get_state_params() const; virtual int set_state_params(std::string const &conf); + virtual std::ostream & write_state_data(std::ostream &os); + virtual std::istream & read_state_data(std::istream &os); virtual std::ostream & write_traj_label(std::ostream &os); virtual std::ostream & write_traj(std::ostream &os); virtual int change_configuration(std::string const &conf); @@ -252,6 +255,8 @@ public: virtual void communicate_forces(); virtual std::string const get_state_params() const; virtual int set_state_params(std::string const &conf); + virtual std::ostream & write_state_data(std::ostream &os); + virtual std::istream & read_state_data(std::istream &os); virtual std::ostream & write_traj_label(std::ostream &os); virtual std::ostream & write_traj(std::ostream &os); @@ -292,6 +297,8 @@ public: virtual std::string const get_state_params() const; virtual int set_state_params(std::string const &conf); + virtual std::ostream & write_state_data(std::ostream &os); + virtual std::istream & read_state_data(std::istream &os); virtual std::ostream & write_traj_label(std::ostream &os); virtual std::ostream & write_traj(std::ostream &os); diff --git a/lib/colvars/colvarcomp.h b/lib/colvars/colvarcomp.h index 3c1ec2495c9c5af29c5549b9c9ddaa355d2f1502..b94d798be9f0a3cd49abc27bb8cdaf41f88e8c87 100644 --- a/lib/colvars/colvarcomp.h +++ b/lib/colvars/colvarcomp.h @@ -140,7 +140,12 @@ public: { return cvc_features; } - + static void delete_features() { + for (size_t i=0; i < cvc_features.size(); i++) { + delete cvc_features[i]; + } + cvc_features.clear(); + } /// \brief Obtain data needed for the calculation for the backend virtual void read_data(); diff --git a/lib/colvars/colvarcomp_coordnums.cpp b/lib/colvars/colvarcomp_coordnums.cpp index 369d489e279c04d5051d8fef83ca0d16e5cf67ed..c34dc772157c64063627759370a8a936a2d46c13 100644 --- a/lib/colvars/colvarcomp_coordnums.cpp +++ b/lib/colvars/colvarcomp_coordnums.cpp @@ -18,6 +18,7 @@ + template<bool calculate_gradients> cvm::real colvar::coordnum::switching_function(cvm::real const &r0, int const &en, @@ -32,8 +33,8 @@ cvm::real colvar::coordnum::switching_function(cvm::real const &r0, int const en2 = en/2; int const ed2 = ed/2; - cvm::real const xn = std::pow(l2, en2); - cvm::real const xd = std::pow(l2, ed2); + cvm::real const xn = cvm::integer_power(l2, en2); + cvm::real const xd = cvm::integer_power(l2, ed2); cvm::real const func = (1.0-xn)/(1.0-xd); if (calculate_gradients) { @@ -62,8 +63,8 @@ cvm::real colvar::coordnum::switching_function(cvm::rvector const &r0_vec, int const en2 = en/2; int const ed2 = ed/2; - cvm::real const xn = std::pow(l2, en2); - cvm::real const xd = std::pow(l2, ed2); + cvm::real const xn = cvm::integer_power(l2, en2); + cvm::real const xd = cvm::integer_power(l2, ed2); cvm::real const func = (1.0-xn)/(1.0-xd); if (calculate_gradients) { @@ -87,6 +88,12 @@ colvar::coordnum::coordnum(std::string const &conf) group1 = parse_group(conf, "group1"); group2 = parse_group(conf, "group2"); + if (int atom_number = cvm::atom_group::overlap(*group1, *group2)) { + cvm::error("Error: group1 and group2 share a common atom (number: " + + cvm::to_str(atom_number) + ")\n"); + return; + } + if (group1->b_dummy) { cvm::error("Error: only group2 is allowed to be a dummy atom\n"); return; @@ -111,11 +118,17 @@ colvar::coordnum::coordnum(std::string const &conf) if (r0_vec.z < 0.0) r0_vec.z *= -1.0; } - get_keyval(conf, "expNumer", en, int(6) ); - get_keyval(conf, "expDenom", ed, int(12)); + get_keyval(conf, "expNumer", en, 6); + get_keyval(conf, "expDenom", ed, 12); if ( (en%2) || (ed%2) ) { - cvm::error("Error: odd exponents provided, can only use even ones.\n", INPUT_ERROR); + cvm::error("Error: odd exponent(s) provided, can only use even ones.\n", + INPUT_ERROR); + } + + if ( (en <= 0) || (ed <= 0) ) { + cvm::error("Error: negative exponent(s) provided.\n", + INPUT_ERROR); } if (!is_enabled(f_cvc_pbc_minimum_image)) { @@ -250,8 +263,13 @@ colvar::h_bond::h_bond(std::string const &conf) get_keyval(conf, "expDenom", ed, 8); if ( (en%2) || (ed%2) ) { - cvm::error("Error: odd exponents provided, can only use even ones.\n"); - return; + cvm::error("Error: odd exponent(s) provided, can only use even ones.\n", + INPUT_ERROR); + } + + if ( (en <= 0) || (ed <= 0) ) { + cvm::error("Error: negative exponent(s) provided.\n", + INPUT_ERROR); } if (cvm::debug()) @@ -318,12 +336,18 @@ colvar::selfcoordnum::selfcoordnum(std::string const &conf) group1 = parse_group(conf, "group1"); get_keyval(conf, "cutoff", r0, cvm::real(4.0 * cvm::unit_angstrom())); - get_keyval(conf, "expNumer", en, int(6) ); - get_keyval(conf, "expDenom", ed, int(12)); + get_keyval(conf, "expNumer", en, 6); + get_keyval(conf, "expDenom", ed, 12); + if ( (en%2) || (ed%2) ) { - cvm::error("Error: odd exponents provided, can only use even ones.\n"); - return; + cvm::error("Error: odd exponent(s) provided, can only use even ones.\n", + INPUT_ERROR); + } + + if ( (en <= 0) || (ed <= 0) ) { + cvm::error("Error: negative exponent(s) provided.\n", + INPUT_ERROR); } if (!is_enabled(f_cvc_pbc_minimum_image)) { @@ -401,12 +425,17 @@ colvar::groupcoordnum::groupcoordnum(std::string const &conf) if (r0_vec.z < 0.0) r0_vec.z *= -1.0; } - get_keyval(conf, "expNumer", en, int(6) ); - get_keyval(conf, "expDenom", ed, int(12)); + get_keyval(conf, "expNumer", en, 6); + get_keyval(conf, "expDenom", ed, 12); if ( (en%2) || (ed%2) ) { - cvm::error("Error: odd exponents provided, can only use even ones.\n"); - return; + cvm::error("Error: odd exponent(s) provided, can only use even ones.\n", + INPUT_ERROR); + } + + if ( (en <= 0) || (ed <= 0) ) { + cvm::error("Error: negative exponent(s) provided.\n", + INPUT_ERROR); } if (!is_enabled(f_cvc_pbc_minimum_image)) { @@ -438,8 +467,8 @@ cvm::real colvar::groupcoordnum::switching_function(cvm::real const &r0, int const en2 = en/2; int const ed2 = ed/2; - cvm::real const xn = std::pow(l2, en2); - cvm::real const xd = std::pow(l2, ed2); + cvm::real const xn = cvm::integer_power(l2, en2); + cvm::real const xd = cvm::integer_power(l2, ed2); cvm::real const func = (1.0-xn)/(1.0-xd); if (calculate_gradients) { @@ -471,8 +500,8 @@ cvm::real colvar::groupcoordnum::switching_function(cvm::rvector const &r0_vec, int const en2 = en/2; int const ed2 = ed/2; - cvm::real const xn = std::pow(l2, en2); - cvm::real const xd = std::pow(l2, ed2); + cvm::real const xn = cvm::integer_power(l2, en2); + cvm::real const xd = cvm::integer_power(l2, ed2); cvm::real const func = (1.0-xn)/(1.0-xd); if (calculate_gradients) { diff --git a/lib/colvars/colvarcomp_distances.cpp b/lib/colvars/colvarcomp_distances.cpp index 18d154515a3cfee7ade375083f70ab932da50067..ce8055843f93d389117f38926e81d97c66bc7954 100644 --- a/lib/colvars/colvarcomp_distances.cpp +++ b/lib/colvars/colvarcomp_distances.cpp @@ -1066,8 +1066,9 @@ void colvar::rmsd::calc_force_invgrads() void colvar::rmsd::calc_Jacobian_derivative() { // divergence of the rotated coordinates (including only derivatives of the rotation matrix) - cvm::real divergence = 0.0; + cvm::real rotation_term = 0.0; + // The rotation term only applies is coordinates are rotated if (atoms->b_rotate) { // gradient of the rotation matrix @@ -1104,7 +1105,7 @@ void colvar::rmsd::calc_Jacobian_derivative() for (size_t alpha = 0; alpha < 3; alpha++) { for (size_t beta = 0; beta < 3; beta++) { - divergence += grad_rot_mat[beta][alpha][alpha] * y[beta]; + rotation_term += grad_rot_mat[beta][alpha][alpha] * y[beta]; // Note: equation was derived for inverse rotation (see colvars paper) // so here the matrix is transposed // (eq would give divergence += grad_rot_mat[alpha][beta][alpha] * y[beta];) @@ -1112,7 +1113,13 @@ void colvar::rmsd::calc_Jacobian_derivative() } } } - jd.real_value = x.real_value > 0.0 ? (3.0 * atoms->size() - 4.0 - divergence) / x.real_value : 0.0; + + // The translation term only applies is coordinates are centered + cvm::real translation_term = atoms->b_center ? 3.0 : 0.0; + + jd.real_value = x.real_value > 0.0 ? + (3.0 * atoms->size() - 1.0 - translation_term - rotation_term) / x.real_value : + 0.0; } diff --git a/lib/colvars/colvarcomp_protein.cpp b/lib/colvars/colvarcomp_protein.cpp index b8fc96cfad140177e5adaa2ceccea1332b5702c7..91e47f13d90f22885d318eff8f9dc43a8b7fc21f 100644 --- a/lib/colvars/colvarcomp_protein.cpp +++ b/lib/colvars/colvarcomp_protein.cpp @@ -150,8 +150,8 @@ void colvar::alpha_angles::calc_value() (theta[i])->calc_value(); cvm::real const t = ((theta[i])->value().real_value-theta_ref)/theta_tol; - cvm::real const f = ( (1.0 - std::pow(t, (int) 2)) / - (1.0 - std::pow(t, (int) 4)) ); + cvm::real const f = ( (1.0 - (t*t)) / + (1.0 - (t*t*t*t)) ); x.real_value += theta_norm * f; @@ -202,12 +202,12 @@ void colvar::alpha_angles::apply_force(colvarvalue const &force) for (size_t i = 0; i < theta.size(); i++) { cvm::real const t = ((theta[i])->value().real_value-theta_ref)/theta_tol; - cvm::real const f = ( (1.0 - std::pow(t, (int) 2)) / - (1.0 - std::pow(t, (int) 4)) ); + cvm::real const f = ( (1.0 - (t*t)) / + (1.0 - (t*t*t*t)) ); cvm::real const dfdt = - 1.0/(1.0 - std::pow(t, (int) 4)) * - ( (-2.0 * t) + (-1.0*f)*(-4.0 * std::pow(t, (int) 3)) ); + 1.0/(1.0 - (t*t*t*t)) * + ( (-2.0 * t) + (-1.0*f)*(-4.0 * (t*t*t)) ); (theta[i])->apply_force(theta_norm * dfdt * (1.0/theta_tol) * diff --git a/lib/colvars/colvardeps.cpp b/lib/colvars/colvardeps.cpp index 8f241a6255953f2ae0d4815a050daa12c077f866..ac906e7be755fc382d1cb383730522da38860092 100644 --- a/lib/colvars/colvardeps.cpp +++ b/lib/colvars/colvardeps.cpp @@ -413,15 +413,27 @@ void colvardeps::init_cvb_requires() { init_feature(f_cvb_apply_force, "apply force", f_type_user); f_req_children(f_cvb_apply_force, f_cv_gradient); - init_feature(f_cvb_get_total_force, "obtain total force"); + init_feature(f_cvb_get_total_force, "obtain total force", f_type_dynamic); f_req_children(f_cvb_get_total_force, f_cv_total_force); init_feature(f_cvb_history_dependent, "history-dependent", f_type_static); + init_feature(f_cvb_time_dependent, "time-dependent", f_type_static); + init_feature(f_cvb_scalar_variables, "require scalar variables", f_type_static); f_req_children(f_cvb_scalar_variables, f_cv_scalar); init_feature(f_cvb_calc_pmf, "calculate a PMF", f_type_static); + + init_feature(f_cvb_calc_ti_samples, "calculate TI samples", f_type_dynamic); + f_req_self(f_cvb_calc_ti_samples, f_cvb_get_total_force); + f_req_children(f_cvb_calc_ti_samples, f_cv_grid); + + init_feature(f_cvb_write_ti_samples, "write TI samples ", f_type_user); + f_req_self(f_cvb_write_ti_samples, f_cvb_calc_ti_samples); + + init_feature(f_cvb_write_ti_pmf, "write TI PMF", f_type_user); + f_req_self(f_cvb_write_ti_pmf, f_cvb_calc_ti_samples); } // Initialize feature_states for each instance @@ -431,6 +443,9 @@ void colvardeps::init_cvb_requires() { // Most features are available, so we set them so // and list exceptions below } + + // only compute TI samples when deriving from colvarbias_ti + feature_states[f_cvb_calc_ti_samples].available = false; } @@ -504,9 +519,6 @@ void colvardeps::init_cv_requires() { init_feature(f_cv_subtract_applied_force, "subtract applied force from total force", f_type_user); f_req_self(f_cv_subtract_applied_force, f_cv_total_force); - // There is no well-defined way to implement f_cv_subtract_applied_force - // in the case of extended-Lagrangian colvars - f_req_exclude(f_cv_subtract_applied_force, f_cv_extended_Lagrangian); init_feature(f_cv_lower_boundary, "lower boundary", f_type_user); f_req_self(f_cv_lower_boundary, f_cv_scalar); @@ -514,7 +526,7 @@ void colvardeps::init_cv_requires() { init_feature(f_cv_upper_boundary, "upper boundary", f_type_user); f_req_self(f_cv_upper_boundary, f_cv_scalar); - init_feature(f_cv_grid, "grid", f_type_user); + init_feature(f_cv_grid, "grid", f_type_dynamic); f_req_self(f_cv_grid, f_cv_lower_boundary); f_req_self(f_cv_grid, f_cv_upper_boundary); @@ -693,7 +705,6 @@ void colvardeps::print_state() { } - void colvardeps::add_child(colvardeps *child) { children.push_back(child); diff --git a/lib/colvars/colvardeps.h b/lib/colvars/colvardeps.h index dfb10d00e421f7635fe4bce5ec88b5f45cfc39eb..bd892fbca8cef746bb4dfad818b1e4577507a735 100644 --- a/lib/colvars/colvardeps.h +++ b/lib/colvars/colvardeps.h @@ -180,8 +180,6 @@ public: protected: - - /// Parse a keyword and enable a feature accordingly bool get_keyval_feature(colvarparse *cvp, std::string const &conf, char const *key, @@ -229,10 +227,18 @@ public: f_cvb_get_total_force, /// \brief depends on simulation history f_cvb_history_dependent, + /// \brief depends on time + f_cvb_time_dependent, /// \brief requires scalar colvars f_cvb_scalar_variables, /// \brief whether this bias will compute a PMF f_cvb_calc_pmf, + /// \brief whether this bias will compute TI samples + f_cvb_calc_ti_samples, + /// \brief whether this bias will write TI samples + f_cvb_write_ti_samples, + /// \brief whether this bias should write the TI PMF + f_cvb_write_ti_pmf, f_cvb_ntot }; diff --git a/lib/colvars/colvargrid.h b/lib/colvars/colvargrid.h index 6f06cb1066fffbc4ad8982a643264f496264657a..a01104dba819c2745a9d63d434b8e6832c9d85b7 100644 --- a/lib/colvars/colvargrid.h +++ b/lib/colvars/colvargrid.h @@ -1403,6 +1403,15 @@ public: /// Constructor from a vector of colvars colvar_grid_gradient(std::vector<colvar *> &colvars); + /// \brief Accumulate the value + inline void acc_value(std::vector<int> const &ix, std::vector<colvarvalue> const &values) { + for (size_t imult = 0; imult < mult; imult++) { + data[address(ix) + imult] += values[imult].real_value; + } + if (samples) + samples->incr_count(ix); + } + /// \brief Accumulate the gradient inline void acc_grad(std::vector<int> const &ix, cvm::real const *grads) { for (size_t imult = 0; imult < mult; imult++) { diff --git a/lib/colvars/colvarmodule.cpp b/lib/colvars/colvarmodule.cpp index 780dc28afaebd4c6db1788c0495e73f8b7904d5b..200c2d6848f10cf6a52ab70745c6049c9387dff7 100644 --- a/lib/colvars/colvarmodule.cpp +++ b/lib/colvars/colvarmodule.cpp @@ -22,7 +22,7 @@ #include "colvarbias_restraint.h" #include "colvarscript.h" #include "colvaratoms.h" - +#include "colvarcomp.h" colvarmodule::colvarmodule(colvarproxy *proxy_in) { @@ -274,9 +274,9 @@ int colvarmodule::parse_global_params(std::string const &conf) parse->get_keyval(conf, "colvarsRestartFrequency", restart_out_freq, restart_out_freq); - // if this is true when initializing, it means - // we are continuing after a reset(): default to true - parse->get_keyval(conf, "colvarsTrajAppend", cv_traj_append, cv_traj_append); + // Deprecate append flag + parse->get_keyval(conf, "colvarsTrajAppend", + cv_traj_append, cv_traj_append, colvarparse::parse_silent); parse->get_keyval(conf, "scriptedColvarForces", use_scripted_forces, false); @@ -409,22 +409,12 @@ int colvarmodule::parse_biases(std::string const &conf) cvm::decrease_depth(); } - size_t i; - - size_t n_hist_dep_biases = 0; - std::vector<std::string> hist_dep_biases_names; - for (i = 0; i < biases.size(); i++) { - if (biases[i]->is_enabled(colvardeps::f_cvb_apply_force) && - biases[i]->is_enabled(colvardeps::f_cvb_history_dependent)) { - n_hist_dep_biases++; - hist_dep_biases_names.push_back(biases[i]->name); - } - } - if (n_hist_dep_biases > 1) { - cvm::log("WARNING: there are "+cvm::to_str(n_hist_dep_biases)+ - " history-dependent biases with non-zero force parameters:\n"+ - cvm::to_str(hist_dep_biases_names)+"\n"+ - "Please make sure that their forces do not counteract each other.\n"); + std::vector<std::string> const time_biases = time_dependent_biases(); + if (time_biases.size() > 1) { + cvm::log("WARNING: there are "+cvm::to_str(time_biases.size())+ + " time-dependent biases with non-zero force parameters:\n"+ + cvm::to_str(time_biases)+"\n"+ + "Please ensure that their forces do not counteract each other.\n"); } if (biases.size() || use_scripted_forces) { @@ -441,7 +431,7 @@ int colvarmodule::parse_biases(std::string const &conf) } -int colvarmodule::num_biases_feature(int feature_id) +int colvarmodule::num_biases_feature(int feature_id) const { colvarmodule *cv = cvm::main(); size_t n = 0; @@ -456,7 +446,7 @@ int colvarmodule::num_biases_feature(int feature_id) } -int colvarmodule::num_biases_type(std::string const &type) +int colvarmodule::num_biases_type(std::string const &type) const { colvarmodule *cv = cvm::main(); size_t n = 0; @@ -471,6 +461,22 @@ int colvarmodule::num_biases_type(std::string const &type) } +std::vector<std::string> const colvarmodule::time_dependent_biases() const +{ + size_t i; + std::vector<std::string> biases_names; + for (i = 0; i < biases.size(); i++) { + if (biases[i]->is_enabled(colvardeps::f_cvb_apply_force) && + biases[i]->is_enabled(colvardeps::f_cvb_active) && + (biases[i]->is_enabled(colvardeps::f_cvb_history_dependent) || + biases[i]->is_enabled(colvardeps::f_cvb_time_dependent))) { + biases_names.push_back(biases[i]->name); + } + } + return biases_names; +} + + int colvarmodule::catch_input_errors(int result) { if (result != COLVARS_OK || get_error()) { @@ -673,8 +679,15 @@ int colvarmodule::calc() } // write restart files, if needed - if (restart_out_freq && restart_out_name.size()) { - error_code |= write_restart_files(); + if (restart_out_freq && (cvm::step_relative() > 0) && + ((cvm::step_absolute() % restart_out_freq) == 0) ) { + if (restart_out_name.size()) { + // Write restart file, if different from main output + error_code |= write_restart_file(restart_out_name); + } else { + error_code |= write_restart_file(output_prefix()+".colvars.state"); + } + write_output_files(); } return error_code; @@ -916,21 +929,16 @@ int colvarmodule::calc_scripted_forces() } -int colvarmodule::write_restart_files() +int colvarmodule::write_restart_file(std::string const &out_name) { - if ( (cvm::step_relative() > 0) && - ((cvm::step_absolute() % restart_out_freq) == 0) ) { - cvm::log("Writing the state file \""+ - restart_out_name+"\".\n"); - proxy->backup_file(restart_out_name); - std::ostream *restart_out_os = proxy->output_stream(restart_out_name); - if (!restart_out_os) return cvm::get_error(); - if (!write_restart(*restart_out_os)) { - return cvm::error("Error: in writing restart file.\n", FILE_ERROR); - } - proxy->close_output_stream(restart_out_name); + cvm::log("Saving collective variables state to \""+out_name+"\".\n"); + proxy->backup_file(out_name); + std::ostream *restart_out_os = proxy->output_stream(out_name); + if (!restart_out_os) return cvm::get_error(); + if (!write_restart(*restart_out_os)) { + return cvm::error("Error: in writing restart file.\n", FILE_ERROR); } - + proxy->close_output_stream(out_name); return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); } @@ -1011,7 +1019,15 @@ colvarmodule::~colvarmodule() { if ((proxy->smp_thread_id() == COLVARS_NOT_IMPLEMENTED) || (proxy->smp_thread_id() == 0)) { + reset(); + + // Delete contents of static arrays + colvarbias::delete_features(); + colvar::delete_features(); + colvar::cvc::delete_features(); + atom_group::delete_features(); + delete parse; parse = NULL; proxy = NULL; @@ -1261,7 +1277,7 @@ continue the previous simulation.\n\n"); to:\n\ \""+ proxy->input_prefix()+".colvars.state\"\n"); output_prefix() = output_prefix()+".tmp"; - write_output_files(); + write_restart_file(output_prefix()+".colvars.state"); cvm::error("Exiting with error until issue is addressed.\n", FATAL_ERROR); } @@ -1277,24 +1293,13 @@ int colvarmodule::backup_file(char const *filename) int colvarmodule::write_output_files() { - // if this is a simulation run (i.e. not a postprocessing), output data - // must be written to be able to restart the simulation - std::string const out_name = - (output_prefix().size() ? - std::string(output_prefix()+".colvars.state") : - std::string("colvars.state")); - cvm::log("Saving collective variables state to \""+out_name+"\".\n"); - - std::ostream * os = proxy->output_stream(out_name); - os->setf(std::ios::scientific, std::ios::floatfield); - this->write_restart(*os); - proxy->close_output_stream(out_name); + int error_code = COLVARS_OK; cvm::increase_depth(); for (std::vector<colvar *>::iterator cvi = colvars.begin(); cvi != colvars.end(); cvi++) { - (*cvi)->write_output_files(); + error_code |= (*cvi)->write_output_files(); } cvm::decrease_depth(); @@ -1302,8 +1307,8 @@ int colvarmodule::write_output_files() for (std::vector<colvarbias *>::iterator bi = biases.begin(); bi != biases.end(); bi++) { - (*bi)->write_output_files(); - (*bi)->write_state_to_replicas(); + error_code |= (*bi)->write_output_files(); + error_code |= (*bi)->write_state_to_replicas(); } cvm::decrease_depth(); @@ -1403,15 +1408,12 @@ std::ostream & colvarmodule::write_restart(std::ostream &os) cvi != colvars.end(); cvi++) { (*cvi)->write_restart(os); - error_code |= (*cvi)->write_output_files(); } for (std::vector<colvarbias *>::iterator bi = biases.begin(); bi != biases.end(); bi++) { (*bi)->write_state(os); - error_code |= (*bi)->write_state_to_replicas(); - error_code |= (*bi)->write_output_files(); } cvm::decrease_depth(); diff --git a/lib/colvars/colvarmodule.h b/lib/colvars/colvarmodule.h index 0f6efd14c4ea1b7574304c7bc16f067b735b5076..14e5d56701f75730c7bb1ce07764c89edd0b665b 100644 --- a/lib/colvars/colvarmodule.h +++ b/lib/colvars/colvarmodule.h @@ -83,6 +83,15 @@ public: /// Defining an abstract real number allows to switch precision typedef double real; + + /// Override std::pow with a product for n positive integer + static inline real integer_power(real x, int n) + { + real result = 1.0; + for (int i = 0; i < n; i++) result *= x; + return result; + } + /// Residue identifier typedef int residue_id; @@ -293,10 +302,13 @@ private: public: /// Return how many biases have this feature enabled - static int num_biases_feature(int feature_id); + int num_biases_feature(int feature_id) const; /// Return how many biases are defined with this type - static int num_biases_type(std::string const &type); + int num_biases_type(std::string const &type) const; + + /// Return the names of time-dependent biases with forces enabled + std::vector<std::string> const time_dependent_biases() const; private: /// Useful wrapper to interrupt parsing if any error occurs @@ -334,9 +346,9 @@ public: /// Write all trajectory files int write_traj_files(); - /// Write all restart files - int write_restart_files(); - /// Write all FINAL output files + /// Write a state file useful to resume the simulation + int write_restart_file(std::string const &out_name); + /// Write all other output files int write_output_files(); /// Backup a file before writing it static int backup_file(char const *filename); @@ -580,7 +592,7 @@ public: /// from static functions in the colvarmodule class static colvarproxy *proxy; - /// \brief Accessor for the above + /// \brief Access the one instance of the Colvars module static colvarmodule *main(); }; diff --git a/lib/colvars/colvarproxy.cpp b/lib/colvars/colvarproxy.cpp index fa24091d5233da18a1af36f0dffceca3f89a26fb..8160144c6bab1e11d36f4d1b7712e75bce90e6c8 100644 --- a/lib/colvars/colvarproxy.cpp +++ b/lib/colvars/colvarproxy.cpp @@ -10,6 +10,10 @@ #include <sstream> #include <string.h> +#if defined(_OPENMP) +#include <omp.h> +#endif + #include "colvarmodule.h" #include "colvarproxy.h" #include "colvarscript.h" @@ -40,6 +44,12 @@ bool colvarproxy_system::total_forces_enabled() const } +bool colvarproxy_system::total_forces_same_step() const +{ + return false; +} + + cvm::real colvarproxy_system::position_dist2(cvm::atom_pos const &pos1, cvm::atom_pos const &pos2) { @@ -204,7 +214,13 @@ void colvarproxy_atom_groups::clear_atom_group(int index) colvarproxy_smp::colvarproxy_smp() { - b_smp_active = true; + b_smp_active = true; // May be disabled by user option + omp_lock_state = NULL; +#if defined(_OPENMP) + if (smp_thread_id() == 0) { + omp_init_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state)); + } +#endif } @@ -213,60 +229,143 @@ colvarproxy_smp::~colvarproxy_smp() {} int colvarproxy_smp::smp_enabled() { +#if defined(_OPENMP) + if (b_smp_active) { + return COLVARS_OK; + } + return COLVARS_ERROR; +#else return COLVARS_NOT_IMPLEMENTED; +#endif } int colvarproxy_smp::smp_colvars_loop() { +#if defined(_OPENMP) + colvarmodule *cv = cvm::main(); + colvarproxy *proxy = cv->proxy; +#pragma omp parallel for + for (size_t i = 0; i < cv->variables_active_smp()->size(); i++) { + colvar *x = (*(cv->variables_active_smp()))[i]; + int x_item = (*(cv->variables_active_smp_items()))[i]; + if (cvm::debug()) { + cvm::log("["+cvm::to_str(proxy->smp_thread_id())+"/"+ + cvm::to_str(proxy->smp_num_threads())+ + "]: calc_colvars_items_smp(), i = "+cvm::to_str(i)+", cv = "+ + x->name+", cvc = "+cvm::to_str(x_item)+"\n"); + } + x->calc_cvcs(x_item, 1); + } + return cvm::get_error(); +#else return COLVARS_NOT_IMPLEMENTED; +#endif } int colvarproxy_smp::smp_biases_loop() { +#if defined(_OPENMP) + colvarmodule *cv = cvm::main(); +#pragma omp parallel + { +#pragma omp for + for (size_t i = 0; i < cv->biases_active()->size(); i++) { + colvarbias *b = (*(cv->biases_active()))[i]; + if (cvm::debug()) { + cvm::log("Calculating bias \""+b->name+"\" on thread "+ + cvm::to_str(smp_thread_id())+"\n"); + } + b->update(); + } + } + return cvm::get_error(); +#else return COLVARS_NOT_IMPLEMENTED; +#endif } int colvarproxy_smp::smp_biases_script_loop() { +#if defined(_OPENMP) + colvarmodule *cv = cvm::main(); +#pragma omp parallel + { +#pragma omp single nowait + { + cv->calc_scripted_forces(); + } +#pragma omp for + for (size_t i = 0; i < cv->biases_active()->size(); i++) { + colvarbias *b = (*(cv->biases_active()))[i]; + if (cvm::debug()) { + cvm::log("Calculating bias \""+b->name+"\" on thread "+ + cvm::to_str(smp_thread_id())+"\n"); + } + b->update(); + } + } + return cvm::get_error(); +#else return COLVARS_NOT_IMPLEMENTED; +#endif } + + int colvarproxy_smp::smp_thread_id() { +#if defined(_OPENMP) + return omp_get_thread_num(); +#else return COLVARS_NOT_IMPLEMENTED; +#endif } int colvarproxy_smp::smp_num_threads() { +#if defined(_OPENMP) + return omp_get_max_threads(); +#else return COLVARS_NOT_IMPLEMENTED; +#endif } int colvarproxy_smp::smp_lock() { +#if defined(_OPENMP) + omp_set_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state)); +#endif return COLVARS_OK; } int colvarproxy_smp::smp_trylock() { +#if defined(_OPENMP) + return omp_test_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state)) ? + COLVARS_OK : COLVARS_ERROR; +#else return COLVARS_OK; +#endif } int colvarproxy_smp::smp_unlock() { +#if defined(_OPENMP) + omp_unset_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state)); +#endif return COLVARS_OK; } - colvarproxy_replicas::colvarproxy_replicas() {} diff --git a/lib/colvars/colvarproxy.h b/lib/colvars/colvarproxy.h index 95d13cd7e05c15ba209333f4a6550e11d29b5977..e51ddfbe3bfc16a9d9a77264b86912a2cd8aa34f 100644 --- a/lib/colvars/colvarproxy.h +++ b/lib/colvars/colvarproxy.h @@ -80,6 +80,9 @@ public: /// Are total forces being used? virtual bool total_forces_enabled() const; + + /// Are total forces from the current step available? + virtual bool total_forces_same_step() const; }; @@ -372,6 +375,11 @@ public: /// Release the lock virtual int smp_unlock(); + +protected: + + /// Lock state for OpenMP + void *omp_lock_state; }; diff --git a/lib/colvars/colvars_version.h b/lib/colvars/colvars_version.h index 312c0fd1a0c86e832ccbac15605867ddd101c824..a92a776f8a3abc88700794228bcf4a1e0ad8da51 100644 --- a/lib/colvars/colvars_version.h +++ b/lib/colvars/colvars_version.h @@ -1,5 +1,5 @@ #ifndef COLVARS_VERSION -#define COLVARS_VERSION "2017-08-06" +#define COLVARS_VERSION "2017-10-20" // This file is part of the Collective Variables module (Colvars). // The original version of Colvars and its updates are located at: // https://github.com/colvars/colvars diff --git a/lib/colvars/colvarscript.cpp b/lib/colvars/colvarscript.cpp index 89302a16a2ead1c29bbc501ec6d8e2aa9d043cb8..9570acd8327252a99e6394f9bd29d6f6a0bd2f4e 100644 --- a/lib/colvars/colvarscript.cpp +++ b/lib/colvars/colvarscript.cpp @@ -11,7 +11,10 @@ #include <stdlib.h> #include <string.h> +#define COLVARSCRIPT_CPP #include "colvarscript.h" +#undef COLVARSCRIPT_CPP + #include "colvarproxy.h" #include "colvardeps.h" @@ -21,6 +24,11 @@ colvarscript::colvarscript(colvarproxy *p) colvars(p->colvars), proxy_error(0) { + comm_help.resize(colvarscript::cv_n_commands); + comm_fns.resize(colvarscript::cv_n_commands); +#define COLVARSCRIPT_INIT_FN +#include "colvarscript.h" +#undef COLVARSCRIPT_INIT_FN } @@ -66,8 +74,7 @@ int colvarscript::run(int objc, unsigned char *const objv[]) } if (objc < 2) { - result = help_string(); - return COLVARS_OK; + return exec_command(cv_help, NULL, objc, objv); } std::string const cmd(obj_to_str(objv[1])); @@ -167,17 +174,7 @@ int colvarscript::run(int objc, unsigned char *const objv[]) /// Parse config from string if (cmd == "config") { - if (objc < 3) { - result = "Missing arguments\n" + help_string(); - return COLVARSCRIPT_ERROR; - } - std::string const conf(obj_to_str(objv[2])); - if (colvars->read_config_string(conf) == COLVARS_OK) { - return COLVARS_OK; - } else { - result = "Error parsing configuration string"; - return COLVARSCRIPT_ERROR; - } + return exec_command(cv_config, NULL, objc, objv); } /// Load an input state file @@ -204,6 +201,8 @@ int colvarscript::run(int objc, unsigned char *const objv[]) proxy->output_prefix() = obj_to_str(objv[2]); int error = 0; error |= colvars->setup_output(); + error |= colvars->write_restart_file(colvars->output_prefix()+ + ".colvars.state"); error |= colvars->write_output_files(); return error ? COLVARSCRIPT_ERROR : COLVARS_OK; } @@ -255,6 +254,10 @@ int colvarscript::run(int objc, unsigned char *const objv[]) } } + if (cmd == "help") { + return exec_command(cv_help, NULL, objc, objv); + } + result = "Syntax error\n" + help_string(); return COLVARSCRIPT_ERROR; } @@ -295,7 +298,9 @@ int colvarscript::proc_colvar(colvar *cv, int objc, unsigned char *const objv[]) // colvar destructor is tasked with the cleanup delete cv; // TODO this could be done by the destructors - colvars->write_traj_label(*(colvars->cv_traj_os)); + if (colvars->cv_traj_os != NULL) { + colvars->write_traj_label(*(colvars->cv_traj_os)); + } return COLVARS_OK; } @@ -374,7 +379,6 @@ int colvarscript::proc_colvar(colvar *cv, int objc, unsigned char *const objv[]) int colvarscript::proc_bias(colvarbias *b, int objc, unsigned char *const objv[]) { - std::string const key(obj_to_str(objv[0])); std::string const subcmd(obj_to_str(objv[2])); if (subcmd == "energy") { @@ -425,7 +429,9 @@ int colvarscript::proc_bias(colvarbias *b, int objc, unsigned char *const objv[] // the bias destructor takes care of the cleanup at cvm level delete b; // TODO this could be done by the destructors - colvars->write_traj_label(*(colvars->cv_traj_os)); + if (colvars->cv_traj_os != NULL) { + colvars->write_traj_label(*(colvars->cv_traj_os)); + } return COLVARS_OK; } @@ -528,7 +534,7 @@ int colvarscript::proc_features(colvardeps *obj, } -std::string colvarscript::help_string() +std::string colvarscript::help_string() const { std::string buf; buf = "Usage: cv <subcommand> [args...]\n\ @@ -538,7 +544,7 @@ Managing the Colvars module:\n\ config <string> -- read configuration from the given string\n\ reset -- delete all internal configuration\n\ delete -- delete this Colvars module instance\n\ - version -- return version of colvars code\n\ + version -- return version of Colvars code\n\ \n\ Input and output:\n\ list -- return a list of all variables\n\ @@ -564,6 +570,8 @@ Accessing collective variables:\n\ colvar <name> type -- return the type of colvar <name>\n\ colvar <name> delete -- delete colvar <name>\n\ colvar <name> addforce <F> -- apply given force on colvar <name>\n\ + colvar <name> getappliedforce -- return applied force of colvar <name>\n\ + colvar <name> gettotalforce -- return total force of colvar <name>\n\ colvar <name> getconfig -- return config string of colvar <name>\n\ colvar <name> cvcflags <fl> -- enable or disable cvcs according to 0/1 flags\n\ colvar <name> get <f> -- get the value of the colvar feature <f>\n\ diff --git a/lib/colvars/colvarscript.h b/lib/colvars/colvarscript.h index 94d451809cdd9feb853dbf5462342a52140d907a..39cd08934059f25e0fe598dc0a915ec79036631a 100644 --- a/lib/colvars/colvarscript.h +++ b/lib/colvars/colvarscript.h @@ -8,21 +8,27 @@ // Colvars repository at GitHub. #ifndef COLVARSCRIPT_H -#define COLVARSCRIPT_H +//#define COLVARSCRIPT_H // Delay definition until later #include <string> +#include <vector> +#include <map> + #include "colvarmodule.h" #include "colvarvalue.h" #include "colvarbias.h" #include "colvarproxy.h" + // Only these error values are part of the scripting interface #define COLVARSCRIPT_ERROR -1 #define COLVARSCRIPT_OK 0 + class colvarscript { private: + colvarproxy *proxy; colvarmodule *colvars; @@ -35,16 +41,93 @@ public: colvarscript(colvarproxy * p); inline ~colvarscript() {} - /// If an error is caught by the proxy through fatal_error(), this is set to COLVARSCRIPT_ERROR + /// If an error is caught by the proxy through fatal_error(), this is set to + /// COLVARSCRIPT_ERROR int proxy_error; - /// If an error is returned by one of the methods, it should set this to the error message + /// If an error is returned by one of the methods, it should set this to the + /// error message std::string result; /// Run script command with given positional arguments (objects) int run(int objc, unsigned char *const objv[]); + /// Set the return value of the script command to the given string + inline void set_str_result(std::string const &s) + { + result = s; + } + + /// Build and return a short help + std::string help_string(void) const; + + /// Use scripting language to get the string representation of an object + inline char const *obj_to_str(unsigned char *const obj) + { + return cvm::proxy->script_obj_to_str(obj); + } + + enum command { + cv_help, + cv_version, + cv_config, + cv_configfile, + cv_reset, + cv_delete, + cv_list, + cv_list_biases, + cv_load, + cv_save, + cv_update, + cv_addenergy, + cv_getenergy, + cv_printframe, + cv_printframelabels, + cv_frame, + cv_colvar, + cv_colvar_value, + cv_colvar_update, + cv_colvar_type, + cv_colvar_delete, + cv_colvar_addforce, + cv_colvar_getappliedforce, + cv_colvar_gettotalforce, + cv_colvar_cvcflags, + cv_colvar_getconfig, + cv_colvar_get, + cv_colvar_set, + cv_bias, + cv_bias_energy, + cv_bias_update, + cv_bias_delete, + cv_bias_getconfig, + cv_bias_get, + cv_bias_set, + cv_n_commands + }; + + /// Execute a script command + inline int exec_command(command c, + void *pobj, + int objc, unsigned char * const *objv) + { + return (*(comm_fns[c]))(pobj, objc, objv); + } + + /// Get help for a command (TODO reformat for each language?) + inline std::string command_help(colvarscript::command c) const + { + return comm_help[c]; + } + + /// Clear all object results + inline void clear_results() + { + result.clear(); + } + private: + /// Run subcommands on colvar int proc_colvar(colvar *cv, int argc, unsigned char *const argv[]); @@ -55,17 +138,146 @@ private: int proc_features(colvardeps *obj, int argc, unsigned char *const argv[]); - /// Build and return a short help - std::string help_string(void); + /// Internal identifiers of command strings + std::map<std::string, command> comm_str_map; -public: + /// Help strings for each command + std::vector<std::string> comm_help; - inline char const *obj_to_str(unsigned char *const obj) - { - return cvm::proxy->script_obj_to_str(obj); - } + /// Number of arguments for each command + std::vector<size_t> comm_n_args; + + /// Arguments for each command + std::vector< std::vector<std::string> > comm_args; + + /// Implementations of each command + std::vector<int (*)(void *, int, unsigned char * const *)> comm_fns; }; +/// Get a pointer to the main colvarscript object +inline static colvarscript *colvarscript_obj() +{ + return cvm::main()->proxy->script; +} + +/// Get a pointer to the colvar object pointed to by pobj +inline static colvar *colvar_obj(void *pobj) +{ + return reinterpret_cast<colvar *>(pobj); +} + +/// Get a pointer to the colvarbias object pointed to by pobj +inline static colvarbias *colvarbias_obj(void *pobj) +{ + return reinterpret_cast<colvarbias *>(pobj); +} + + +#define CVSCRIPT_COMM_FNAME(COMM) cvscript_ ## COMM + +#define CVSCRIPT_COMM_PROTO(COMM) \ + int CVSCRIPT_COMM_FNAME(COMM)(void *, int, unsigned char *const *); + +#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) \ + CVSCRIPT_COMM_PROTO(COMM) + +#undef COLVARSCRIPT_H +#endif // #ifndef COLVARSCRIPT_H + + +#ifdef COLVARSCRIPT_CPP +#define CVSCRIPT_COMM_FN(COMM,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) \ + int CVSCRIPT_COMM_FNAME(COMM)(void *pobj, \ + int objc, unsigned char *const objv[]) \ + { \ + colvarscript *script = colvarscript_obj(); \ + script->clear_results(); \ + if (objc < 2+N_ARGS_MIN) /* "cv" and "COMM" are 1st and 2nd */ { \ + script->set_str_result("Missing arguments\n" + \ + script->command_help(colvarscript::COMM)); \ + return COLVARSCRIPT_ERROR; \ + } \ + if (objc > 2+N_ARGS_MAX) { \ + script->set_str_result("Too many arguments\n" + \ + script->command_help(colvarscript::COMM)); \ + return COLVARSCRIPT_ERROR; \ + } \ + FN_BODY; \ + } +#undef CVSCRIPT +#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) \ + CVSCRIPT_COMM_FN(COMM,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) +#endif // #ifdef COLVARSCRIPT_CPP + + +#ifdef COLVARSCRIPT_INIT_FN +#define CVSCRIPT_COMM_INIT(COMM,HELP,ARGS) { \ + comm_str_map[#COMM] = COMM; \ + comm_help[COMM] = HELP; \ + comm_fns[COMM] = &(CVSCRIPT_COMM_FNAME(COMM)); \ + } +#undef CVSCRIPT +#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) \ + CVSCRIPT_COMM_INIT(COMM,HELP,ARGS) #endif + + +#if !defined(COLVARSCRIPT_H) || defined(COLVARSCRIPT_INIT_FN) +#define COLVARSCRIPT_H + +#ifndef COLVARSCRIPT_INIT_FN +#ifdef __cplusplus +extern "C" { +#endif +#endif + + // Add optional arguments for command-specific help? + CVSCRIPT(cv_help, + "Print the help message", + 0, 0, + {}, + script->set_str_result(script->help_string()); + return COLVARS_OK; + ) + + CVSCRIPT(cv_config, + "Read configuration from the given string", + 1, 1, + { "conf (str) - Configuration string" }, + std::string const conf(script->obj_to_str(objv[2])); + if (cvm::main()->read_config_string(conf) == COLVARS_OK) { + return COLVARS_OK; + } + script->set_str_result("Error parsing configuration string"); + return COLVARSCRIPT_ERROR; + ) + + CVSCRIPT(cv_addenergy, + "Add an energy to the MD engine", + 1, 1, + { "E (float) - Amount of energy to add" }, + cvm::main()->total_bias_energy += + strtod(script->obj_to_str(objv[2]), NULL); + return COLVARS_OK; + ) + + CVSCRIPT(cv_getenergy, + "Get the current Colvars energy", + 1, 1, + { "E (float) - Store the energy in this variable" }, + double *energy = reinterpret_cast<double *>(objv[2]); + *energy = cvm::main()->total_bias_energy; + return COLVARS_OK; + ) + +#ifndef COLVARSCRIPT_INIT_FN +#ifdef __cplusplus +} // extern "C" +#endif +#endif + +#undef CVSCRIPT + +#endif // #ifndef COLVARSCRIPT_H diff --git a/lib/colvars/colvartypes.cpp b/lib/colvars/colvartypes.cpp index 428fe1a4b1d864c725cd7d5b09652216a03091a3..b604606d4628911138f2553215db9d7b3c306630 100644 --- a/lib/colvars/colvartypes.cpp +++ b/lib/colvars/colvartypes.cpp @@ -312,7 +312,7 @@ void colvarmodule::rotation::diagonalize_matrix(cvm::matrix2d<cvm::real> &S, cvm::real norm2 = 0.0; size_t i; for (i = 0; i < 4; i++) { - norm2 += std::pow(S_eigvec[ie][i], int(2)); + norm2 += S_eigvec[ie][i] * S_eigvec[ie][i]; } cvm::real const norm = std::sqrt(norm2); for (i = 0; i < 4; i++) { diff --git a/lib/colvars/colvartypes.h b/lib/colvars/colvartypes.h index fe3160eb4b3238ef036c2ea56d6b3954d73cb121..97257d18ad66d1da99b0b74586190e735687850b 100644 --- a/lib/colvars/colvartypes.h +++ b/lib/colvars/colvartypes.h @@ -705,7 +705,7 @@ public: { std::stringstream stream(s); size_t i = 0; - while ((stream >> data[i]) && (i < data.size())) { + while ((i < data.size()) && (stream >> data[i])) { i++; } if (i < data.size()) { diff --git a/lib/colvars/lepton/include/Lepton.h b/lib/colvars/lepton/include/Lepton.h new file mode 100644 index 0000000000000000000000000000000000000000..22edcb3ff9d43d2bdd7b1851fdf68099d4c1d195 --- /dev/null +++ b/lib/colvars/lepton/include/Lepton.h @@ -0,0 +1,43 @@ +#ifndef LEPTON_H_ +#define LEPTON_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/CompiledExpression.h" +#include "lepton/CustomFunction.h" +#include "lepton/ExpressionProgram.h" +#include "lepton/ExpressionTreeNode.h" +#include "lepton/Operation.h" +#include "lepton/ParsedExpression.h" +#include "lepton/Parser.h" + +#endif /*LEPTON_H_*/ diff --git a/lib/colvars/lepton/include/lepton/CompiledExpression.h b/lib/colvars/lepton/include/lepton/CompiledExpression.h new file mode 100644 index 0000000000000000000000000000000000000000..67442e0cf5140a1cbcfd03945304c39ae0fff815 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/CompiledExpression.h @@ -0,0 +1,113 @@ +#ifndef LEPTON_COMPILED_EXPRESSION_H_ +#define LEPTON_COMPILED_EXPRESSION_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2013-2016 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "ExpressionTreeNode.h" +#include "windowsIncludes.h" +#include <map> +#include <set> +#include <string> +#include <utility> +#include <vector> +#ifdef LEPTON_USE_JIT + #include "asmjit.h" +#endif + +namespace Lepton { + +class Operation; +class ParsedExpression; + +/** + * A CompiledExpression is a highly optimized representation of an expression for cases when you want to evaluate + * it many times as quickly as possible. You should treat it as an opaque object; none of the internal representation + * is visible. + * + * A CompiledExpression is created by calling createCompiledExpression() on a ParsedExpression. + * + * WARNING: CompiledExpression is NOT thread safe. You should never access a CompiledExpression from two threads at + * the same time. + */ + +class LEPTON_EXPORT CompiledExpression { +public: + CompiledExpression(); + CompiledExpression(const CompiledExpression& expression); + ~CompiledExpression(); + CompiledExpression& operator=(const CompiledExpression& expression); + /** + * Get the names of all variables used by this expression. + */ + const std::set<std::string>& getVariables() const; + /** + * Get a reference to the memory location where the value of a particular variable is stored. This can be used + * to set the value of the variable before calling evaluate(). + */ + double& getVariableReference(const std::string& name); + /** + * You can optionally specify the memory locations from which the values of variables should be read. + * This is useful, for example, when several expressions all use the same variable. You can then set + * the value of that variable in one place, and it will be seen by all of them. + */ + void setVariableLocations(std::map<std::string, double*>& variableLocations); + /** + * Evaluate the expression. The values of all variables should have been set before calling this. + */ + double evaluate() const; +private: + friend class ParsedExpression; + CompiledExpression(const ParsedExpression& expression); + void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps); + int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps); + std::map<std::string, double*> variablePointers; + std::vector<std::pair<double*, double*> > variablesToCopy; + std::vector<std::vector<int> > arguments; + std::vector<int> target; + std::vector<Operation*> operation; + std::map<std::string, int> variableIndices; + std::set<std::string> variableNames; + mutable std::vector<double> workspace; + mutable std::vector<double> argValues; + std::map<std::string, double> dummyVariables; + void* jitCode; +#ifdef LEPTON_USE_JIT + void generateJitCode(); + void generateSingleArgCall(asmjit::X86Compiler& c, asmjit::X86XmmVar& dest, asmjit::X86XmmVar& arg, double (*function)(double)); + std::vector<double> constants; + asmjit::JitRuntime runtime; +#endif +}; + +} // namespace Lepton + +#endif /*LEPTON_COMPILED_EXPRESSION_H_*/ diff --git a/lib/colvars/lepton/include/lepton/CustomFunction.h b/lib/colvars/lepton/include/lepton/CustomFunction.h new file mode 100644 index 0000000000000000000000000000000000000000..5c5586105f7f20d5ca218b6d5f12b2ad34d19629 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/CustomFunction.h @@ -0,0 +1,77 @@ +#ifndef LEPTON_CUSTOM_FUNCTION_H_ +#define LEPTON_CUSTOM_FUNCTION_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "windowsIncludes.h" + +namespace Lepton { + +/** + * This class is the interface for defining your own function that may be included in expressions. + * To use it, create a concrete subclass that implements all of the virtual methods for each new function + * you want to define. Then when you call Parser::parse() to parse an expression, pass a map of + * function names to CustomFunction objects. + */ + +class LEPTON_EXPORT CustomFunction { +public: + virtual ~CustomFunction() { + } + /** + * Get the number of arguments this function expects. + */ + virtual int getNumArguments() const = 0; + /** + * Evaluate the function. + * + * @param arguments the array of argument values + */ + virtual double evaluate(const double* arguments) const = 0; + /** + * Evaluate a derivative of the function. + * + * @param arguments the array of argument values + * @param derivOrder an array specifying the number of times the function has been differentiated + * with respect to each of its arguments. For example, the array {0, 2} indicates + * a second derivative with respect to the second argument. + */ + virtual double evaluateDerivative(const double* arguments, const int* derivOrder) const = 0; + /** + * Create a new duplicate of this object on the heap using the "new" operator. + */ + virtual CustomFunction* clone() const = 0; +}; + +} // namespace Lepton + +#endif /*LEPTON_CUSTOM_FUNCTION_H_*/ diff --git a/lib/colvars/lepton/include/lepton/Exception.h b/lib/colvars/lepton/include/lepton/Exception.h new file mode 100644 index 0000000000000000000000000000000000000000..5ad55714d183e843176d62315ef54bde7872497e --- /dev/null +++ b/lib/colvars/lepton/include/lepton/Exception.h @@ -0,0 +1,59 @@ +#ifndef LEPTON_EXCEPTION_H_ +#define LEPTON_EXCEPTION_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include <exception> +#include <string> + +namespace Lepton { + +/** + * This class is used for all exceptions thrown by Lepton. + */ + +class Exception : public std::exception { +public: + Exception(const std::string& message) : message(message) { + } + ~Exception() throw() { + } + const char* what() const throw() { + return message.c_str(); + } +private: + std::string message; +}; + +} // namespace Lepton + +#endif /*LEPTON_EXCEPTION_H_*/ diff --git a/lib/colvars/lepton/include/lepton/ExpressionProgram.h b/lib/colvars/lepton/include/lepton/ExpressionProgram.h new file mode 100644 index 0000000000000000000000000000000000000000..94d37f471d08bb7b2041aa3c42aafc6a3bc5bac0 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/ExpressionProgram.h @@ -0,0 +1,95 @@ +#ifndef LEPTON_EXPRESSION_PROGRAM_H_ +#define LEPTON_EXPRESSION_PROGRAM_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "ExpressionTreeNode.h" +#include "windowsIncludes.h" +#include <map> +#include <string> +#include <vector> + +namespace Lepton { + +class ParsedExpression; + +/** + * An ExpressionProgram is a linear sequence of Operations for evaluating an expression. The evaluation + * is done with a stack. The arguments to each Operation are first taken off the stack in order, then it is + * evaluated and the result is pushed back onto the stack. At the end, the stack contains a single value, + * which is the value of the expression. + * + * An ExpressionProgram is created by calling createProgram() on a ParsedExpression. + */ + +class LEPTON_EXPORT ExpressionProgram { +public: + ExpressionProgram(); + ExpressionProgram(const ExpressionProgram& program); + ~ExpressionProgram(); + ExpressionProgram& operator=(const ExpressionProgram& program); + /** + * Get the number of Operations that make up this program. + */ + int getNumOperations() const; + /** + * Get an Operation in this program. + */ + const Operation& getOperation(int index) const; + /** + * Get the size of the stack needed to execute this program. This is the largest number of elements present + * on the stack at any point during evaluation. + */ + int getStackSize() const; + /** + * Evaluate the expression. If the expression involves any variables, this method will throw an exception. + */ + double evaluate() const; + /** + * Evaluate the expression. + * + * @param variables a map specifying the values of all variables that appear in the expression. If any + * variable appears in the expression but is not included in this map, an exception + * will be thrown. + */ + double evaluate(const std::map<std::string, double>& variables) const; +private: + friend class ParsedExpression; + ExpressionProgram(const ParsedExpression& expression); + void buildProgram(const ExpressionTreeNode& node); + std::vector<Operation*> operations; + int maxArgs, stackSize; +}; + +} // namespace Lepton + +#endif /*LEPTON_EXPRESSION_PROGRAM_H_*/ diff --git a/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h b/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h new file mode 100644 index 0000000000000000000000000000000000000000..bf3a9a0902aae5118c0e76b619347e851c117879 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h @@ -0,0 +1,105 @@ +#ifndef LEPTON_EXPRESSION_TREE_NODE_H_ +#define LEPTON_EXPRESSION_TREE_NODE_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "windowsIncludes.h" +#include <string> +#include <vector> + +namespace Lepton { + +class Operation; + +/** + * This class represents a node in the abstract syntax tree representation of an expression. + * Each node is defined by an Operation and a set of children. When the expression is + * evaluated, each child is first evaluated in order, then the resulting values are passed + * as the arguments to the Operation's evaluate() method. + */ + +class LEPTON_EXPORT ExpressionTreeNode { +public: + /** + * Create a new ExpressionTreeNode. + * + * @param operation the operation for this node. The ExpressionTreeNode takes over ownership + * of this object, and deletes it when the node is itself deleted. + * @param children the children of this node + */ + ExpressionTreeNode(Operation* operation, const std::vector<ExpressionTreeNode>& children); + /** + * Create a new ExpressionTreeNode with two children. + * + * @param operation the operation for this node. The ExpressionTreeNode takes over ownership + * of this object, and deletes it when the node is itself deleted. + * @param child1 the first child of this node + * @param child2 the second child of this node + */ + ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2); + /** + * Create a new ExpressionTreeNode with one child. + * + * @param operation the operation for this node. The ExpressionTreeNode takes over ownership + * of this object, and deletes it when the node is itself deleted. + * @param child the child of this node + */ + ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child); + /** + * Create a new ExpressionTreeNode with no children. + * + * @param operation the operation for this node. The ExpressionTreeNode takes over ownership + * of this object, and deletes it when the node is itself deleted. + */ + ExpressionTreeNode(Operation* operation); + ExpressionTreeNode(const ExpressionTreeNode& node); + ExpressionTreeNode(); + ~ExpressionTreeNode(); + bool operator==(const ExpressionTreeNode& node) const; + bool operator!=(const ExpressionTreeNode& node) const; + ExpressionTreeNode& operator=(const ExpressionTreeNode& node); + /** + * Get the Operation performed by this node. + */ + const Operation& getOperation() const; + /** + * Get this node's child nodes. + */ + const std::vector<ExpressionTreeNode>& getChildren() const; +private: + Operation* operation; + std::vector<ExpressionTreeNode> children; +}; + +} // namespace Lepton + +#endif /*LEPTON_EXPRESSION_TREE_NODE_H_*/ diff --git a/lib/colvars/lepton/include/lepton/Operation.h b/lib/colvars/lepton/include/lepton/Operation.h new file mode 100644 index 0000000000000000000000000000000000000000..f7a8b78163403eea136b92c48a35d7b69e97a419 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/Operation.h @@ -0,0 +1,1165 @@ +#ifndef LEPTON_OPERATION_H_ +#define LEPTON_OPERATION_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009-2015 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "windowsIncludes.h" +#include "CustomFunction.h" +#include "Exception.h" +#include <cmath> +#include <map> +#include <string> +#include <vector> +#include <sstream> +#include <algorithm> + +namespace Lepton { + +class ExpressionTreeNode; + +/** + * An Operation represents a single step in the evaluation of an expression, such as a function, + * an operator, or a constant value. Each Operation takes some number of values as arguments + * and produces a single value. + * + * This is an abstract class with subclasses for specific operations. + */ + +class LEPTON_EXPORT Operation { +public: + virtual ~Operation() { + } + /** + * This enumeration lists all Operation subclasses. This is provided so that switch statements + * can be used when processing or analyzing parsed expressions. + */ + enum Id {CONSTANT, VARIABLE, CUSTOM, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, NEGATE, SQRT, EXP, LOG, + SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, SINH, COSH, TANH, ERF, ERFC, STEP, DELTA, SQUARE, CUBE, RECIPROCAL, + ADD_CONSTANT, MULTIPLY_CONSTANT, POWER_CONSTANT, MIN, MAX, ABS, FLOOR, CEIL, SELECT}; + /** + * Get the name of this Operation. + */ + virtual std::string getName() const = 0; + /** + * Get this Operation's ID. + */ + virtual Id getId() const = 0; + /** + * Get the number of arguments this operation expects. + */ + virtual int getNumArguments() const = 0; + /** + * Create a clone of this Operation. + */ + virtual Operation* clone() const = 0; + /** + * Perform the computation represented by this operation. + * + * @param args the array of arguments + * @param variables a map containing the values of all variables + * @return the result of performing the computation. + */ + virtual double evaluate(double* args, const std::map<std::string, double>& variables) const = 0; + /** + * Return an ExpressionTreeNode which represents the analytic derivative of this Operation with respect to a variable. + * + * @param children the child nodes + * @param childDerivs the derivatives of the child nodes with respect to the variable + * @param variable the variable with respect to which the derivate should be taken + */ + virtual ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const = 0; + /** + * Get whether this operation should be displayed with infix notation. + */ + virtual bool isInfixOperator() const { + return false; + } + /** + * Get whether this is a symmetric binary operation, such that exchanging its arguments + * does not affect the result. + */ + virtual bool isSymmetric() const { + return false; + } + virtual bool operator!=(const Operation& op) const { + return op.getId() != getId(); + } + virtual bool operator==(const Operation& op) const { + return !(*this != op); + } + class Constant; + class Variable; + class Custom; + class Add; + class Subtract; + class Multiply; + class Divide; + class Power; + class Negate; + class Sqrt; + class Exp; + class Log; + class Sin; + class Cos; + class Sec; + class Csc; + class Tan; + class Cot; + class Asin; + class Acos; + class Atan; + class Sinh; + class Cosh; + class Tanh; + class Erf; + class Erfc; + class Step; + class Delta; + class Square; + class Cube; + class Reciprocal; + class AddConstant; + class MultiplyConstant; + class PowerConstant; + class Min; + class Max; + class Abs; + class Floor; + class Ceil; + class Select; +}; + +class LEPTON_EXPORT Operation::Constant : public Operation { +public: + Constant(double value) : value(value) { + } + std::string getName() const { + std::stringstream name; + name << value; + return name.str(); + } + Id getId() const { + return CONSTANT; + } + int getNumArguments() const { + return 0; + } + Operation* clone() const { + return new Constant(value); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return value; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + double getValue() const { + return value; + } + bool operator!=(const Operation& op) const { + const Constant* o = dynamic_cast<const Constant*>(&op); + return (o == NULL || o->value != value); + } +private: + double value; +}; + +class LEPTON_EXPORT Operation::Variable : public Operation { +public: + Variable(const std::string& name) : name(name) { + } + std::string getName() const { + return name; + } + Id getId() const { + return VARIABLE; + } + int getNumArguments() const { + return 0; + } + Operation* clone() const { + return new Variable(name); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + std::map<std::string, double>::const_iterator iter = variables.find(name); + if (iter == variables.end()) + throw Exception("No value specified for variable "+name); + return iter->second; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool operator!=(const Operation& op) const { + const Variable* o = dynamic_cast<const Variable*>(&op); + return (o == NULL || o->name != name); + } +private: + std::string name; +}; + +class LEPTON_EXPORT Operation::Custom : public Operation { +public: + Custom(const std::string& name, CustomFunction* function) : name(name), function(function), isDerivative(false), derivOrder(function->getNumArguments(), 0) { + } + Custom(const Custom& base, int derivIndex) : name(base.name), function(base.function->clone()), isDerivative(true), derivOrder(base.derivOrder) { + derivOrder[derivIndex]++; + } + ~Custom() { + delete function; + } + std::string getName() const { + return name; + } + Id getId() const { + return CUSTOM; + } + int getNumArguments() const { + return function->getNumArguments(); + } + Operation* clone() const { + Custom* clone = new Custom(name, function->clone()); + clone->isDerivative = isDerivative; + clone->derivOrder = derivOrder; + return clone; + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + if (isDerivative) + return function->evaluateDerivative(args, &derivOrder[0]); + return function->evaluate(args); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + const std::vector<int>& getDerivOrder() const { + return derivOrder; + } + bool operator!=(const Operation& op) const { + const Custom* o = dynamic_cast<const Custom*>(&op); + return (o == NULL || o->name != name || o->isDerivative != isDerivative || o->derivOrder != derivOrder); + } +private: + std::string name; + CustomFunction* function; + bool isDerivative; + std::vector<int> derivOrder; +}; + +class LEPTON_EXPORT Operation::Add : public Operation { +public: + Add() { + } + std::string getName() const { + return "+"; + } + Id getId() const { + return ADD; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Add(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]+args[1]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool isInfixOperator() const { + return true; + } + bool isSymmetric() const { + return true; + } +}; + +class LEPTON_EXPORT Operation::Subtract : public Operation { +public: + Subtract() { + } + std::string getName() const { + return "-"; + } + Id getId() const { + return SUBTRACT; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Subtract(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]-args[1]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool isInfixOperator() const { + return true; + } +}; + +class LEPTON_EXPORT Operation::Multiply : public Operation { +public: + Multiply() { + } + std::string getName() const { + return "*"; + } + Id getId() const { + return MULTIPLY; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Multiply(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]*args[1]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool isInfixOperator() const { + return true; + } + bool isSymmetric() const { + return true; + } +}; + +class LEPTON_EXPORT Operation::Divide : public Operation { +public: + Divide() { + } + std::string getName() const { + return "/"; + } + Id getId() const { + return DIVIDE; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Divide(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]/args[1]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool isInfixOperator() const { + return true; + } +}; + +class LEPTON_EXPORT Operation::Power : public Operation { +public: + Power() { + } + std::string getName() const { + return "^"; + } + Id getId() const { + return POWER; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Power(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::pow(args[0], args[1]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + bool isInfixOperator() const { + return true; + } +}; + +class LEPTON_EXPORT Operation::Negate : public Operation { +public: + Negate() { + } + std::string getName() const { + return "-"; + } + Id getId() const { + return NEGATE; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Negate(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return -args[0]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Sqrt : public Operation { +public: + Sqrt() { + } + std::string getName() const { + return "sqrt"; + } + Id getId() const { + return SQRT; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Sqrt(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::sqrt(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Exp : public Operation { +public: + Exp() { + } + std::string getName() const { + return "exp"; + } + Id getId() const { + return EXP; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Exp(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::exp(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Log : public Operation { +public: + Log() { + } + std::string getName() const { + return "log"; + } + Id getId() const { + return LOG; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Log(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::log(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Sin : public Operation { +public: + Sin() { + } + std::string getName() const { + return "sin"; + } + Id getId() const { + return SIN; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Sin(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::sin(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Cos : public Operation { +public: + Cos() { + } + std::string getName() const { + return "cos"; + } + Id getId() const { + return COS; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Cos(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::cos(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Sec : public Operation { +public: + Sec() { + } + std::string getName() const { + return "sec"; + } + Id getId() const { + return SEC; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Sec(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return 1.0/std::cos(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Csc : public Operation { +public: + Csc() { + } + std::string getName() const { + return "csc"; + } + Id getId() const { + return CSC; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Csc(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return 1.0/std::sin(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Tan : public Operation { +public: + Tan() { + } + std::string getName() const { + return "tan"; + } + Id getId() const { + return TAN; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Tan(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::tan(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Cot : public Operation { +public: + Cot() { + } + std::string getName() const { + return "cot"; + } + Id getId() const { + return COT; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Cot(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return 1.0/std::tan(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Asin : public Operation { +public: + Asin() { + } + std::string getName() const { + return "asin"; + } + Id getId() const { + return ASIN; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Asin(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::asin(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Acos : public Operation { +public: + Acos() { + } + std::string getName() const { + return "acos"; + } + Id getId() const { + return ACOS; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Acos(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::acos(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Atan : public Operation { +public: + Atan() { + } + std::string getName() const { + return "atan"; + } + Id getId() const { + return ATAN; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Atan(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::atan(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Sinh : public Operation { +public: + Sinh() { + } + std::string getName() const { + return "sinh"; + } + Id getId() const { + return SINH; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Sinh(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::sinh(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Cosh : public Operation { +public: + Cosh() { + } + std::string getName() const { + return "cosh"; + } + Id getId() const { + return COSH; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Cosh(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::cosh(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Tanh : public Operation { +public: + Tanh() { + } + std::string getName() const { + return "tanh"; + } + Id getId() const { + return TANH; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Tanh(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::tanh(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Erf : public Operation { +public: + Erf() { + } + std::string getName() const { + return "erf"; + } + Id getId() const { + return ERF; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Erf(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const; + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Erfc : public Operation { +public: + Erfc() { + } + std::string getName() const { + return "erfc"; + } + Id getId() const { + return ERFC; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Erfc(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const; + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Step : public Operation { +public: + Step() { + } + std::string getName() const { + return "step"; + } + Id getId() const { + return STEP; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Step(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return (args[0] >= 0.0 ? 1.0 : 0.0); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Delta : public Operation { +public: + Delta() { + } + std::string getName() const { + return "delta"; + } + Id getId() const { + return DELTA; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Delta(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return (args[0] == 0.0 ? 1.0 : 0.0); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Square : public Operation { +public: + Square() { + } + std::string getName() const { + return "square"; + } + Id getId() const { + return SQUARE; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Square(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]*args[0]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Cube : public Operation { +public: + Cube() { + } + std::string getName() const { + return "cube"; + } + Id getId() const { + return CUBE; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Cube(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]*args[0]*args[0]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Reciprocal : public Operation { +public: + Reciprocal() { + } + std::string getName() const { + return "recip"; + } + Id getId() const { + return RECIPROCAL; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Reciprocal(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return 1.0/args[0]; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::AddConstant : public Operation { +public: + AddConstant(double value) : value(value) { + } + std::string getName() const { + std::stringstream name; + name << value << "+"; + return name.str(); + } + Id getId() const { + return ADD_CONSTANT; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new AddConstant(value); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]+value; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + double getValue() const { + return value; + } + bool operator!=(const Operation& op) const { + const AddConstant* o = dynamic_cast<const AddConstant*>(&op); + return (o == NULL || o->value != value); + } +private: + double value; +}; + +class LEPTON_EXPORT Operation::MultiplyConstant : public Operation { +public: + MultiplyConstant(double value) : value(value) { + } + std::string getName() const { + std::stringstream name; + name << value << "*"; + return name.str(); + } + Id getId() const { + return MULTIPLY_CONSTANT; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new MultiplyConstant(value); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return args[0]*value; + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + double getValue() const { + return value; + } + bool operator!=(const Operation& op) const { + const MultiplyConstant* o = dynamic_cast<const MultiplyConstant*>(&op); + return (o == NULL || o->value != value); + } +private: + double value; +}; + +class LEPTON_EXPORT Operation::PowerConstant : public Operation { +public: + PowerConstant(double value) : value(value) { + intValue = (int) value; + isIntPower = (intValue == value); + } + std::string getName() const { + std::stringstream name; + name << "^" << value; + return name.str(); + } + Id getId() const { + return POWER_CONSTANT; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new PowerConstant(value); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + if (isIntPower) { + // Integer powers can be computed much more quickly by repeated multiplication. + + int exponent = intValue; + double base = args[0]; + if (exponent < 0) { + exponent = -exponent; + base = 1.0/base; + } + double result = 1.0; + while (exponent != 0) { + if ((exponent&1) == 1) + result *= base; + base *= base; + exponent = exponent>>1; + } + return result; + } + else + return std::pow(args[0], value); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; + double getValue() const { + return value; + } + bool operator!=(const Operation& op) const { + const PowerConstant* o = dynamic_cast<const PowerConstant*>(&op); + return (o == NULL || o->value != value); + } + bool isInfixOperator() const { + return true; + } +private: + double value; + int intValue; + bool isIntPower; +}; + +class LEPTON_EXPORT Operation::Min : public Operation { +public: + Min() { + } + std::string getName() const { + return "min"; + } + Id getId() const { + return MIN; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Min(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + // parens around (std::min) are workaround for horrible microsoft max/min macro trouble + return (std::min)(args[0], args[1]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Max : public Operation { +public: + Max() { + } + std::string getName() const { + return "max"; + } + Id getId() const { + return MAX; + } + int getNumArguments() const { + return 2; + } + Operation* clone() const { + return new Max(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + // parens around (std::min) are workaround for horrible microsoft max/min macro trouble + return (std::max)(args[0], args[1]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Abs : public Operation { +public: + Abs() { + } + std::string getName() const { + return "abs"; + } + Id getId() const { + return ABS; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Abs(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::abs(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Floor : public Operation { +public: + + Floor() { + } + std::string getName() const { + return "floor"; + } + Id getId() const { + return FLOOR; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Floor(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::floor(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Ceil : public Operation { +public: + Ceil() { + } + std::string getName() const { + return "ceil"; + } + Id getId() const { + return CEIL; + } + int getNumArguments() const { + return 1; + } + Operation* clone() const { + return new Ceil(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return std::ceil(args[0]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +class LEPTON_EXPORT Operation::Select : public Operation { +public: + Select() { + } + std::string getName() const { + return "select"; + } + Id getId() const { + return SELECT; + } + int getNumArguments() const { + return 3; + } + Operation* clone() const { + return new Select(); + } + double evaluate(double* args, const std::map<std::string, double>& variables) const { + return (args[0] != 0.0 ? args[1] : args[2]); + } + ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const; +}; + +} // namespace Lepton + +#endif /*LEPTON_OPERATION_H_*/ diff --git a/lib/colvars/lepton/include/lepton/ParsedExpression.h b/lib/colvars/lepton/include/lepton/ParsedExpression.h new file mode 100644 index 0000000000000000000000000000000000000000..d88b3d5829167cd19b2b122f86f974719a620435 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/ParsedExpression.h @@ -0,0 +1,130 @@ +#ifndef LEPTON_PARSED_EXPRESSION_H_ +#define LEPTON_PARSED_EXPRESSION_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009=2013 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "ExpressionTreeNode.h" +#include "windowsIncludes.h" +#include <map> +#include <string> + +namespace Lepton { + +class CompiledExpression; +class ExpressionProgram; + +/** + * This class represents the result of parsing an expression. It provides methods for working with the + * expression in various ways, such as evaluating it, getting the tree representation of the expresson, etc. + */ + +class LEPTON_EXPORT ParsedExpression { +public: + /** + * Create an uninitialized ParsedExpression. This exists so that ParsedExpressions can be put in STL containers. + * Doing anything with it will produce an exception. + */ + ParsedExpression(); + /** + * Create a ParsedExpression. Normally you will not call this directly. Instead, use the Parser class + * to parse expression. + */ + ParsedExpression(const ExpressionTreeNode& rootNode); + /** + * Get the root node of the expression's abstract syntax tree. + */ + const ExpressionTreeNode& getRootNode() const; + /** + * Evaluate the expression. If the expression involves any variables, this method will throw an exception. + */ + double evaluate() const; + /** + * Evaluate the expression. + * + * @param variables a map specifying the values of all variables that appear in the expression. If any + * variable appears in the expression but is not included in this map, an exception + * will be thrown. + */ + double evaluate(const std::map<std::string, double>& variables) const; + /** + * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate. + */ + ParsedExpression optimize() const; + /** + * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate. + * + * @param variables a map specifying values for a subset of variables that appear in the expression. + * All occurrences of these variables in the expression are replaced with the values + * specified. + */ + ParsedExpression optimize(const std::map<std::string, double>& variables) const; + /** + * Create a new ParsedExpression which is the analytic derivative of this expression with respect to a + * particular variable. + * + * @param variable the variable with respect to which the derivate should be taken + */ + ParsedExpression differentiate(const std::string& variable) const; + /** + * Create an ExpressionProgram that represents the same calculation as this expression. + */ + ExpressionProgram createProgram() const; + /** + * Create a CompiledExpression that represents the same calculation as this expression. + */ + CompiledExpression createCompiledExpression() const; + /** + * Create a new ParsedExpression which is identical to this one, except that the names of some + * variables have been changed. + * + * @param replacements a map whose keys are the names of variables, and whose values are the + * new names to replace them with + */ + ParsedExpression renameVariables(const std::map<std::string, std::string>& replacements) const; +private: + static double evaluate(const ExpressionTreeNode& node, const std::map<std::string, double>& variables); + static ExpressionTreeNode preevaluateVariables(const ExpressionTreeNode& node, const std::map<std::string, double>& variables); + static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node); + static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node); + static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable); + static double getConstantValue(const ExpressionTreeNode& node); + static ExpressionTreeNode renameNodeVariables(const ExpressionTreeNode& node, const std::map<std::string, std::string>& replacements); + ExpressionTreeNode rootNode; +}; + +LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ExpressionTreeNode& node); + +LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ParsedExpression& exp); + +} // namespace Lepton + +#endif /*LEPTON_PARSED_EXPRESSION_H_*/ diff --git a/lib/colvars/lepton/include/lepton/Parser.h b/lib/colvars/lepton/include/lepton/Parser.h new file mode 100644 index 0000000000000000000000000000000000000000..63d5988d5fa2af79718bbb2a1df6298b43b6260d --- /dev/null +++ b/lib/colvars/lepton/include/lepton/Parser.h @@ -0,0 +1,77 @@ +#ifndef LEPTON_PARSER_H_ +#define LEPTON_PARSER_H_ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "windowsIncludes.h" +#include <map> +#include <string> +#include <vector> + +namespace Lepton { + +class CustomFunction; +class ExpressionTreeNode; +class Operation; +class ParsedExpression; +class ParseToken; + +/** + * This class provides the main interface for parsing expressions. + */ + +class LEPTON_EXPORT Parser { +public: + /** + * Parse a mathematical expression and return a representation of it as an abstract syntax tree. + */ + static ParsedExpression parse(const std::string& expression); + /** + * Parse a mathematical expression and return a representation of it as an abstract syntax tree. + * + * @param customFunctions a map specifying user defined functions that may appear in the expression. + * The key are function names, and the values are corresponding CustomFunction objects. + */ + static ParsedExpression parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions); +private: + static std::string trim(const std::string& expression); + static std::vector<ParseToken> tokenize(const std::string& expression); + static ParseToken getNextToken(const std::string& expression, int start); + static ExpressionTreeNode parsePrecedence(const std::vector<ParseToken>& tokens, int& pos, const std::map<std::string, CustomFunction*>& customFunctions, + const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence); + static Operation* getOperatorOperation(const std::string& name); + static Operation* getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions); +}; + +} // namespace Lepton + +#endif /*LEPTON_PARSER_H_*/ diff --git a/lib/colvars/lepton/include/lepton/windowsIncludes.h b/lib/colvars/lepton/include/lepton/windowsIncludes.h new file mode 100644 index 0000000000000000000000000000000000000000..798229850e78305231f6216a883361422180d9a7 --- /dev/null +++ b/lib/colvars/lepton/include/lepton/windowsIncludes.h @@ -0,0 +1,41 @@ +#ifndef LEPTON_WINDOW_INCLUDE_H_ +#define LEPTON_WINDOW_INCLUDE_H_ + +/* + * Shared libraries are messy in Visual Studio. We have to distinguish three + * cases: + * (1) this header is being used to build the Lepton shared library + * (dllexport) + * (2) this header is being used by a *client* of the Lepton shared + * library (dllimport) + * (3) we are building the Lepton static library, or the client is + * being compiled with the expectation of linking with the + * Lepton static library (nothing special needed) + * In the CMake script for building this library, we define one of the symbols + * Lepton_BUILDING_{SHARED|STATIC}_LIBRARY + * Client code normally has no special symbol defined, in which case we'll + * assume it wants to use the shared library. However, if the client defines + * the symbol LEPTON_USE_STATIC_LIBRARIES we'll suppress the dllimport so + * that the client code can be linked with static libraries. Note that + * the client symbol is not library dependent, while the library symbols + * affect only the Lepton library, meaning that other libraries can + * be clients of this one. However, we are assuming all-static or all-shared. + */ + +#ifdef _MSC_VER + // We don't want to hear about how sprintf is "unsafe". + #pragma warning(disable:4996) + // Keep MS VC++ quiet about lack of dll export of private members. + #pragma warning(disable:4251) + #if defined(LEPTON_BUILDING_SHARED_LIBRARY) + #define LEPTON_EXPORT __declspec(dllexport) + #elif defined(LEPTON_BUILDING_STATIC_LIBRARY) || defined(LEPTON_USE_STATIC_LIBRARIES) + #define LEPTON_EXPORT + #else + #define LEPTON_EXPORT __declspec(dllimport) // i.e., a client of a shared library + #endif +#else + #define LEPTON_EXPORT // Linux, Mac +#endif + +#endif // LEPTON_WINDOW_INCLUDE_H_ diff --git a/lib/colvars/lepton/src/CompiledExpression.cpp b/lib/colvars/lepton/src/CompiledExpression.cpp new file mode 100644 index 0000000000000000000000000000000000000000..302f294ee2380e58326833d526b77b1904755ee6 --- /dev/null +++ b/lib/colvars/lepton/src/CompiledExpression.cpp @@ -0,0 +1,400 @@ +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2013-2016 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/CompiledExpression.h" +#include "lepton/Operation.h" +#include "lepton/ParsedExpression.h" +#include <utility> + +using namespace Lepton; +using namespace std; +#ifdef LEPTON_USE_JIT + using namespace asmjit; +#endif + +CompiledExpression::CompiledExpression() : jitCode(NULL) { +} + +CompiledExpression::CompiledExpression(const ParsedExpression& expression) : jitCode(NULL) { + ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized. + vector<pair<ExpressionTreeNode, int> > temps; + compileExpression(expr.getRootNode(), temps); + int maxArguments = 1; + for (int i = 0; i < (int) operation.size(); i++) + if (operation[i]->getNumArguments() > maxArguments) + maxArguments = operation[i]->getNumArguments(); + argValues.resize(maxArguments); +#ifdef LEPTON_USE_JIT + generateJitCode(); +#endif +} + +CompiledExpression::~CompiledExpression() { + for (int i = 0; i < (int) operation.size(); i++) + if (operation[i] != NULL) + delete operation[i]; +} + +CompiledExpression::CompiledExpression(const CompiledExpression& expression) : jitCode(NULL) { + *this = expression; +} + +CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expression) { + arguments = expression.arguments; + target = expression.target; + variableIndices = expression.variableIndices; + variableNames = expression.variableNames; + workspace.resize(expression.workspace.size()); + argValues.resize(expression.argValues.size()); + operation.resize(expression.operation.size()); + for (int i = 0; i < (int) operation.size(); i++) + operation[i] = expression.operation[i]->clone(); + setVariableLocations(variablePointers); + return *this; +} + +void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) { + if (findTempIndex(node, temps) != -1) + return; // We have already processed a node identical to this one. + + // Process the child nodes. + + vector<int> args; + for (int i = 0; i < node.getChildren().size(); i++) { + compileExpression(node.getChildren()[i], temps); + args.push_back(findTempIndex(node.getChildren()[i], temps)); + } + + // Process this node. + + if (node.getOperation().getId() == Operation::VARIABLE) { + variableIndices[node.getOperation().getName()] = (int) workspace.size(); + variableNames.insert(node.getOperation().getName()); + } + else { + int stepIndex = (int) arguments.size(); + arguments.push_back(vector<int>()); + target.push_back((int) workspace.size()); + operation.push_back(node.getOperation().clone()); + if (args.size() == 0) + arguments[stepIndex].push_back(0); // The value won't actually be used. We just need something there. + else { + // If the arguments are sequential, we can just pass a pointer to the first one. + + bool sequential = true; + for (int i = 1; i < args.size(); i++) + if (args[i] != args[i-1]+1) + sequential = false; + if (sequential) + arguments[stepIndex].push_back(args[0]); + else + arguments[stepIndex] = args; + } + } + temps.push_back(make_pair(node, (int) workspace.size())); + workspace.push_back(0.0); +} + +int CompiledExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) { + for (int i = 0; i < (int) temps.size(); i++) + if (temps[i].first == node) + return i; + return -1; +} + +const set<string>& CompiledExpression::getVariables() const { + return variableNames; +} + +double& CompiledExpression::getVariableReference(const string& name) { + map<string, double*>::iterator pointer = variablePointers.find(name); + if (pointer != variablePointers.end()) + return *pointer->second; + map<string, int>::iterator index = variableIndices.find(name); + if (index == variableIndices.end()) + throw Exception("getVariableReference: Unknown variable '"+name+"'"); + return workspace[index->second]; +} + +void CompiledExpression::setVariableLocations(map<string, double*>& variableLocations) { + variablePointers = variableLocations; +#ifdef LEPTON_USE_JIT + // Rebuild the JIT code. + + if (workspace.size() > 0) + generateJitCode(); +#else + // Make a list of all variables we will need to copy before evaluating the expression. + + variablesToCopy.clear(); + for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) { + map<string, double*>::iterator pointer = variablePointers.find(iter->first); + if (pointer != variablePointers.end()) + variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second)); + } +#endif +} + +double CompiledExpression::evaluate() const { +#ifdef LEPTON_USE_JIT + return ((double (*)()) jitCode)(); +#else + for (int i = 0; i < variablesToCopy.size(); i++) + *variablesToCopy[i].first = *variablesToCopy[i].second; + + // Loop over the operations and evaluate each one. + + for (int step = 0; step < operation.size(); step++) { + const vector<int>& args = arguments[step]; + if (args.size() == 1) + workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables); + else { + for (int i = 0; i < args.size(); i++) + argValues[i] = workspace[args[i]]; + workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables); + } + } + return workspace[workspace.size()-1]; +#endif +} + +#ifdef LEPTON_USE_JIT +static double evaluateOperation(Operation* op, double* args) { + map<string, double>* dummyVariables = NULL; + return op->evaluate(args, *dummyVariables); +} + +void CompiledExpression::generateJitCode() { + X86Compiler c(&runtime); + c.addFunc(kFuncConvHost, FuncBuilder0<double>()); + vector<X86XmmVar> workspaceVar(workspace.size()); + for (int i = 0; i < (int) workspaceVar.size(); i++) + workspaceVar[i] = c.newXmmVar(kX86VarTypeXmmSd); + X86GpVar argsPointer(c); + c.mov(argsPointer, imm_ptr(&argValues[0])); + + // Load the arguments into variables. + + for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) { + map<string, int>::iterator index = variableIndices.find(*iter); + X86GpVar variablePointer(c); + c.mov(variablePointer, imm_ptr(&getVariableReference(index->first))); + c.movsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0)); + } + + // Make a list of all constants that will be needed for evaluation. + + vector<int> operationConstantIndex(operation.size(), -1); + for (int step = 0; step < (int) operation.size(); step++) { + // Find the constant value (if any) used by this operation. + + Operation& op = *operation[step]; + double value; + if (op.getId() == Operation::CONSTANT) + value = dynamic_cast<Operation::Constant&>(op).getValue(); + else if (op.getId() == Operation::ADD_CONSTANT) + value = dynamic_cast<Operation::AddConstant&>(op).getValue(); + else if (op.getId() == Operation::MULTIPLY_CONSTANT) + value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue(); + else if (op.getId() == Operation::RECIPROCAL) + value = 1.0; + else if (op.getId() == Operation::STEP) + value = 1.0; + else if (op.getId() == Operation::DELTA) + value = 1.0; + else + continue; + + // See if we already have a variable for this constant. + + for (int i = 0; i < (int) constants.size(); i++) + if (value == constants[i]) { + operationConstantIndex[step] = i; + break; + } + if (operationConstantIndex[step] == -1) { + operationConstantIndex[step] = constants.size(); + constants.push_back(value); + } + } + + // Load constants into variables. + + vector<X86XmmVar> constantVar(constants.size()); + if (constants.size() > 0) { + X86GpVar constantsPointer(c); + c.mov(constantsPointer, imm_ptr(&constants[0])); + for (int i = 0; i < (int) constants.size(); i++) { + constantVar[i] = c.newXmmVar(kX86VarTypeXmmSd); + c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0)); + } + } + + // Evaluate the operations. + + for (int step = 0; step < (int) operation.size(); step++) { + Operation& op = *operation[step]; + vector<int> args = arguments[step]; + if (args.size() == 1) { + // One or more sequential arguments. Fill out the list. + + for (int i = 1; i < op.getNumArguments(); i++) + args.push_back(args[0]+i); + } + + // Generate instructions to execute this operation. + + switch (op.getId()) { + case Operation::CONSTANT: + c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + break; + case Operation::ADD: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.addsd(workspaceVar[target[step]], workspaceVar[args[1]]); + break; + case Operation::SUBTRACT: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.subsd(workspaceVar[target[step]], workspaceVar[args[1]]); + break; + case Operation::MULTIPLY: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.mulsd(workspaceVar[target[step]], workspaceVar[args[1]]); + break; + case Operation::DIVIDE: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.divsd(workspaceVar[target[step]], workspaceVar[args[1]]); + break; + case Operation::NEGATE: + c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]); + c.subsd(workspaceVar[target[step]], workspaceVar[args[0]]); + break; + case Operation::SQRT: + c.sqrtsd(workspaceVar[target[step]], workspaceVar[args[0]]); + break; + case Operation::EXP: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp); + break; + case Operation::LOG: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log); + break; + case Operation::SIN: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin); + break; + case Operation::COS: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos); + break; + case Operation::TAN: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan); + break; + case Operation::ASIN: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin); + break; + case Operation::ACOS: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos); + break; + case Operation::ATAN: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan); + break; + case Operation::SINH: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh); + break; + case Operation::COSH: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh); + break; + case Operation::TANH: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh); + break; + case Operation::STEP: + c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]); + c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18 + c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + break; + case Operation::DELTA: + c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]); + c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16 + c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + break; + case Operation::SQUARE: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]); + break; + case Operation::CUBE: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]); + break; + case Operation::RECIPROCAL: + c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + c.divsd(workspaceVar[target[step]], workspaceVar[args[0]]); + break; + case Operation::ADD_CONSTANT: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.addsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + break; + case Operation::MULTIPLY_CONSTANT: + c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]); + c.mulsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]); + break; + case Operation::ABS: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], fabs); + break; + case Operation::FLOOR: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], floor); + break; + case Operation::CEIL: + generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], ceil); + break; + default: + // Just invoke evaluateOperation(). + + for (int i = 0; i < (int) args.size(); i++) + c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]); + X86GpVar fn(c, kVarTypeIntPtr); + c.mov(fn, imm_ptr((void*) evaluateOperation)); + X86CallNode* call = c.call(fn, kFuncConvHost, FuncBuilder2<double, Operation*, double*>()); + call->setArg(0, imm_ptr(&op)); + call->setArg(1, imm_ptr(&argValues[0])); + call->setRet(0, workspaceVar[target[step]]); + } + } + c.ret(workspaceVar[workspace.size()-1]); + c.endFunc(); + jitCode = c.make(); +} + +void CompiledExpression::generateSingleArgCall(X86Compiler& c, X86XmmVar& dest, X86XmmVar& arg, double (*function)(double)) { + X86GpVar fn(c, kVarTypeIntPtr); + c.mov(fn, imm_ptr((void*) function)); + X86CallNode* call = c.call(fn, kFuncConvHost, FuncBuilder1<double, double>()); + call->setArg(0, arg); + call->setRet(0, dest); +} +#endif diff --git a/lib/colvars/lepton/src/ExpressionProgram.cpp b/lib/colvars/lepton/src/ExpressionProgram.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65d3f0c79a6fac0a33a6fd6be49c8ab3f7678fee --- /dev/null +++ b/lib/colvars/lepton/src/ExpressionProgram.cpp @@ -0,0 +1,105 @@ +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009-2013 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/ExpressionProgram.h" +#include "lepton/Operation.h" +#include "lepton/ParsedExpression.h" + +using namespace Lepton; +using namespace std; + +ExpressionProgram::ExpressionProgram() : maxArgs(0), stackSize(0) { +} + +ExpressionProgram::ExpressionProgram(const ParsedExpression& expression) : maxArgs(0), stackSize(0) { + buildProgram(expression.getRootNode()); + int currentStackSize = 0; + for (int i = 0; i < (int) operations.size(); i++) { + int args = operations[i]->getNumArguments(); + if (args > maxArgs) + maxArgs = args; + currentStackSize += 1-args; + if (currentStackSize > stackSize) + stackSize = currentStackSize; + } +} + +ExpressionProgram::~ExpressionProgram() { + for (int i = 0; i < (int) operations.size(); i++) + delete operations[i]; +} + +ExpressionProgram::ExpressionProgram(const ExpressionProgram& program) { + *this = program; +} + +ExpressionProgram& ExpressionProgram::operator=(const ExpressionProgram& program) { + maxArgs = program.maxArgs; + stackSize = program.stackSize; + operations.resize(program.operations.size()); + for (int i = 0; i < (int) operations.size(); i++) + operations[i] = program.operations[i]->clone(); + return *this; +} + +void ExpressionProgram::buildProgram(const ExpressionTreeNode& node) { + for (int i = (int) node.getChildren().size()-1; i >= 0; i--) + buildProgram(node.getChildren()[i]); + operations.push_back(node.getOperation().clone()); +} + +int ExpressionProgram::getNumOperations() const { + return (int) operations.size(); +} + +const Operation& ExpressionProgram::getOperation(int index) const { + return *operations[index]; +} + +int ExpressionProgram::getStackSize() const { + return stackSize; +} + +double ExpressionProgram::evaluate() const { + return evaluate(map<string, double>()); +} + +double ExpressionProgram::evaluate(const std::map<std::string, double>& variables) const { + vector<double> stack(stackSize+1); + int stackPointer = stackSize; + for (int i = 0; i < (int) operations.size(); i++) { + int numArgs = operations[i]->getNumArguments(); + double result = operations[i]->evaluate(&stack[stackPointer], variables); + stackPointer += numArgs-1; + stack[stackPointer] = result; + } + return stack[stackSize-1]; +} diff --git a/lib/colvars/lepton/src/ExpressionTreeNode.cpp b/lib/colvars/lepton/src/ExpressionTreeNode.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8519878262ab67e9e1d82712697b1d49b821168e --- /dev/null +++ b/lib/colvars/lepton/src/ExpressionTreeNode.cpp @@ -0,0 +1,107 @@ +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009-2015 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/ExpressionTreeNode.h" +#include "lepton/Exception.h" +#include "lepton/Operation.h" + +using namespace Lepton; +using namespace std; + +ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) { + if (operation->getNumArguments() != children.size()) + throw Exception("wrong number of arguments to function: "+operation->getName()); +} + +ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) { + children.push_back(child1); + children.push_back(child2); + if (operation->getNumArguments() != children.size()) + throw Exception("wrong number of arguments to function: "+operation->getName()); +} + +ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) { + children.push_back(child); + if (operation->getNumArguments() != children.size()) + throw Exception("wrong number of arguments to function: "+operation->getName()); +} + +ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) { + if (operation->getNumArguments() != children.size()) + throw Exception("wrong number of arguments to function: "+operation->getName()); +} + +ExpressionTreeNode::ExpressionTreeNode(const ExpressionTreeNode& node) : operation(node.operation == NULL ? NULL : node.operation->clone()), children(node.getChildren()) { +} + +ExpressionTreeNode::ExpressionTreeNode() : operation(NULL) { +} + +ExpressionTreeNode::~ExpressionTreeNode() { + if (operation != NULL) + delete operation; +} + +bool ExpressionTreeNode::operator!=(const ExpressionTreeNode& node) const { + if (node.getOperation() != getOperation()) + return true; + if (getOperation().isSymmetric() && getChildren().size() == 2) { + if (getChildren()[0] == node.getChildren()[0] && getChildren()[1] == node.getChildren()[1]) + return false; + if (getChildren()[0] == node.getChildren()[1] && getChildren()[1] == node.getChildren()[0]) + return false; + return true; + } + for (int i = 0; i < (int) getChildren().size(); i++) + if (getChildren()[i] != node.getChildren()[i]) + return true; + return false; +} + +bool ExpressionTreeNode::operator==(const ExpressionTreeNode& node) const { + return !(*this != node); +} + +ExpressionTreeNode& ExpressionTreeNode::operator=(const ExpressionTreeNode& node) { + if (operation != NULL) + delete operation; + operation = node.getOperation().clone(); + children = node.getChildren(); + return *this; +} + +const Operation& ExpressionTreeNode::getOperation() const { + return *operation; +} + +const vector<ExpressionTreeNode>& ExpressionTreeNode::getChildren() const { + return children; +} diff --git a/lib/colvars/lepton/src/MSVC_erfc.h b/lib/colvars/lepton/src/MSVC_erfc.h new file mode 100644 index 0000000000000000000000000000000000000000..eadb20fdf89322f3836c058be530e10d53a629d6 --- /dev/null +++ b/lib/colvars/lepton/src/MSVC_erfc.h @@ -0,0 +1,87 @@ +#ifndef LEPTON_MSVC_ERFC_H_ +#define LEPTON_MSVC_ERFC_H_ + +/* + * Up to version 11 (VC++ 2012), Microsoft does not support the + * standard C99 erf() and erfc() functions so we have to fake them here. + * These were added in version 12 (VC++ 2013), which sets _MSC_VER=1800 + * (VC11 has _MSC_VER=1700). + */ + +#if defined(_MSC_VER) +#define M_PI 3.14159265358979323846264338327950288 + +#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12 +/*************************** +* erf.cpp +* author: Steve Strand +* written: 29-Jan-04 +***************************/ + +#include <cmath> + +static const double rel_error= 1E-12; //calculate 12 significant figures +//you can adjust rel_error to trade off between accuracy and speed +//but don't ask for > 15 figures (assuming usual 52 bit mantissa in a double) + +static double erfc(double x); + +static double erf(double x) +//erf(x) = 2/sqrt(pi)*integral(exp(-t^2),t,0,x) +// = 2/sqrt(pi)*[x - x^3/3 + x^5/5*2! - x^7/7*3! + ...] +// = 1-erfc(x) +{ + static const double two_sqrtpi= 1.128379167095512574; // 2/sqrt(pi) + if (fabs(x) > 2.2) { + return 1.0 - erfc(x); //use continued fraction when fabs(x) > 2.2 + } + double sum= x, term= x, xsqr= x*x; + int j= 1; + do { + term*= xsqr/j; + sum-= term/(2*j+1); + ++j; + term*= xsqr/j; + sum+= term/(2*j+1); + ++j; + } while (fabs(term)/sum > rel_error); + return two_sqrtpi*sum; +} + + +static double erfc(double x) +//erfc(x) = 2/sqrt(pi)*integral(exp(-t^2),t,x,inf) +// = exp(-x^2)/sqrt(pi) * [1/x+ (1/2)/x+ (2/2)/x+ (3/2)/x+ (4/2)/x+ ...] +// = 1-erf(x) +//expression inside [] is a continued fraction so '+' means add to denominator only +{ + static const double one_sqrtpi= 0.564189583547756287; // 1/sqrt(pi) + if (fabs(x) < 2.2) { + return 1.0 - erf(x); //use series when fabs(x) < 2.2 + } + // Don't look for x==0 here! + if (x < 0) { //continued fraction only valid for x>0 + return 2.0 - erfc(-x); + } + double a=1, b=x; //last two convergent numerators + double c=x, d=x*x+0.5; //last two convergent denominators + double q1, q2= b/d; //last two convergents (a/c and b/d) + double n= 1.0, t; + do { + t= a*n+b*x; + a= b; + b= t; + t= c*n+d*x; + c= d; + d= t; + n+= 0.5; + q1= q2; + q2= b/d; + } while (fabs(q1-q2)/q2 > rel_error); + return one_sqrtpi*exp(-x*x)*q2; +} + +#endif // _MSC_VER <= 1700 +#endif // _MSC_VER + +#endif // LEPTON_MSVC_ERFC_H_ diff --git a/lib/colvars/lepton/src/Operation.cpp b/lib/colvars/lepton/src/Operation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..693dea2edec60db68740b715a8d589d32131bead --- /dev/null +++ b/lib/colvars/lepton/src/Operation.cpp @@ -0,0 +1,335 @@ + +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009-2015 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/Operation.h" +#include "lepton/ExpressionTreeNode.h" +#include "MSVC_erfc.h" + +using namespace Lepton; +using namespace std; + +double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const { + return erf(args[0]); +} + +double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const { + return erfc(args[0]); +} + +ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + if (variable == name) + return ExpressionTreeNode(new Operation::Constant(1.0)); + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + if (function->getNumArguments() == 0) + return ExpressionTreeNode(new Operation::Constant(0.0)); + ExpressionTreeNode result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, 0), children), childDerivs[0]); + for (int i = 1; i < getNumArguments(); i++) { + result = ExpressionTreeNode(new Operation::Add(), + result, + ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i])); + } + return result; +} + +ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]); +} + +ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]); +} + +ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Add(), + ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]), + ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0])); +} + +ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Divide(), + ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]), + ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])), + ExpressionTreeNode(new Operation::Square(), children[1])); +} + +ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Add(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Multiply(), + children[1], + ExpressionTreeNode(new Operation::Power(), + children[0], ExpressionTreeNode(new Operation::AddConstant(-1.0), children[1]))), + childDerivs[0]), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Log(), children[0]), + ExpressionTreeNode(new Operation::Power(), children[0], children[1])), + childDerivs[1])); +} + +ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]); +} + +ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::MultiplyConstant(0.5), + ExpressionTreeNode(new Operation::Reciprocal(), + ExpressionTreeNode(new Operation::Sqrt(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Exp(), children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Reciprocal(), children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Cos(), children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Sin(), children[0])), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Sec(), children[0]), + ExpressionTreeNode(new Operation::Tan(), children[0])), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Csc(), children[0]), + ExpressionTreeNode(new Operation::Cot(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Square(), + ExpressionTreeNode(new Operation::Sec(), children[0])), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Square(), + ExpressionTreeNode(new Operation::Csc(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Reciprocal(), + ExpressionTreeNode(new Operation::Sqrt(), + ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Constant(1.0)), + ExpressionTreeNode(new Operation::Square(), children[0])))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Reciprocal(), + ExpressionTreeNode(new Operation::Sqrt(), + ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Constant(1.0)), + ExpressionTreeNode(new Operation::Square(), children[0]))))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Reciprocal(), + ExpressionTreeNode(new Operation::AddConstant(1.0), + ExpressionTreeNode(new Operation::Square(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Cosh(), + children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Sinh(), + children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Constant(1.0)), + ExpressionTreeNode(new Operation::Square(), + ExpressionTreeNode(new Operation::Tanh(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))), + ExpressionTreeNode(new Operation::Exp(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Square(), children[0])))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))), + ExpressionTreeNode(new Operation::Exp(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Square(), children[0])))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::MultiplyConstant(2.0), + children[0]), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::MultiplyConstant(3.0), + ExpressionTreeNode(new Operation::Square(), children[0])), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::Negate(), + ExpressionTreeNode(new Operation::Reciprocal(), + ExpressionTreeNode(new Operation::Square(), children[0]))), + childDerivs[0]); +} + +ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return childDerivs[0]; +} + +ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::MultiplyConstant(value), + childDerivs[0]); +} + +ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Multiply(), + ExpressionTreeNode(new Operation::MultiplyConstant(value), + ExpressionTreeNode(new Operation::PowerConstant(value-1), + children[0])), + childDerivs[0]); +} + +ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + ExpressionTreeNode step(new Operation::Step(), + ExpressionTreeNode(new Operation::Subtract(), children[0], children[1])); + return ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Multiply(), childDerivs[1], step), + ExpressionTreeNode(new Operation::Multiply(), childDerivs[0], + ExpressionTreeNode(new Operation::AddConstant(-1), step))); +} + +ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + ExpressionTreeNode step(new Operation::Step(), + ExpressionTreeNode(new Operation::Subtract(), children[0], children[1])); + return ExpressionTreeNode(new Operation::Subtract(), + ExpressionTreeNode(new Operation::Multiply(), childDerivs[0], step), + ExpressionTreeNode(new Operation::Multiply(), childDerivs[1], + ExpressionTreeNode(new Operation::AddConstant(-1), step))); +} + +ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + ExpressionTreeNode step(new Operation::Step(), children[0]); + return ExpressionTreeNode(new Operation::Multiply(), + childDerivs[0], + ExpressionTreeNode(new Operation::AddConstant(-1), + ExpressionTreeNode(new Operation::MultiplyConstant(2), step))); +} + +ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + return ExpressionTreeNode(new Operation::Constant(0.0)); +} + +ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const { + vector<ExpressionTreeNode> derivChildren; + derivChildren.push_back(children[0]); + derivChildren.push_back(childDerivs[1]); + derivChildren.push_back(childDerivs[2]); + return ExpressionTreeNode(new Operation::Select(), derivChildren); +} diff --git a/lib/colvars/lepton/src/ParsedExpression.cpp b/lib/colvars/lepton/src/ParsedExpression.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6effd060072f7a9e10261c49d8812da15e850f8b --- /dev/null +++ b/lib/colvars/lepton/src/ParsedExpression.cpp @@ -0,0 +1,351 @@ +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/ParsedExpression.h" +#include "lepton/CompiledExpression.h" +#include "lepton/ExpressionProgram.h" +#include "lepton/Operation.h" +#include <limits> +#include <vector> + +using namespace Lepton; +using namespace std; + +ParsedExpression::ParsedExpression() : rootNode(ExpressionTreeNode()) { +} + +ParsedExpression::ParsedExpression(const ExpressionTreeNode& rootNode) : rootNode(rootNode) { +} + +const ExpressionTreeNode& ParsedExpression::getRootNode() const { + if (&rootNode.getOperation() == NULL) + throw Exception("Illegal call to an initialized ParsedExpression"); + return rootNode; +} + +double ParsedExpression::evaluate() const { + return evaluate(getRootNode(), map<string, double>()); +} + +double ParsedExpression::evaluate(const map<string, double>& variables) const { + return evaluate(getRootNode(), variables); +} + +double ParsedExpression::evaluate(const ExpressionTreeNode& node, const map<string, double>& variables) { + int numArgs = (int) node.getChildren().size(); + vector<double> args(max(numArgs, 1)); + for (int i = 0; i < numArgs; i++) + args[i] = evaluate(node.getChildren()[i], variables); + return node.getOperation().evaluate(&args[0], variables); +} + +ParsedExpression ParsedExpression::optimize() const { + ExpressionTreeNode result = precalculateConstantSubexpressions(getRootNode()); + while (true) { + ExpressionTreeNode simplified = substituteSimplerExpression(result); + if (simplified == result) + break; + result = simplified; + } + return ParsedExpression(result); +} + +ParsedExpression ParsedExpression::optimize(const map<string, double>& variables) const { + ExpressionTreeNode result = preevaluateVariables(getRootNode(), variables); + result = precalculateConstantSubexpressions(result); + while (true) { + ExpressionTreeNode simplified = substituteSimplerExpression(result); + if (simplified == result) + break; + result = simplified; + } + return ParsedExpression(result); +} + +ExpressionTreeNode ParsedExpression::preevaluateVariables(const ExpressionTreeNode& node, const map<string, double>& variables) { + if (node.getOperation().getId() == Operation::VARIABLE) { + const Operation::Variable& var = dynamic_cast<const Operation::Variable&>(node.getOperation()); + map<string, double>::const_iterator iter = variables.find(var.getName()); + if (iter == variables.end()) + return node; + return ExpressionTreeNode(new Operation::Constant(iter->second)); + } + vector<ExpressionTreeNode> children(node.getChildren().size()); + for (int i = 0; i < (int) children.size(); i++) + children[i] = preevaluateVariables(node.getChildren()[i], variables); + return ExpressionTreeNode(node.getOperation().clone(), children); +} + +ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node) { + vector<ExpressionTreeNode> children(node.getChildren().size()); + for (int i = 0; i < (int) children.size(); i++) + children[i] = precalculateConstantSubexpressions(node.getChildren()[i]); + ExpressionTreeNode result = ExpressionTreeNode(node.getOperation().clone(), children); + if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM) + return result; + for (int i = 0; i < (int) children.size(); i++) + if (children[i].getOperation().getId() != Operation::CONSTANT) + return result; + return ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>()))); +} + +ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node) { + vector<ExpressionTreeNode> children(node.getChildren().size()); + for (int i = 0; i < (int) children.size(); i++) + children[i] = substituteSimplerExpression(node.getChildren()[i]); + switch (node.getOperation().getId()) { + case Operation::ADD: + { + double first = getConstantValue(children[0]); + double second = getConstantValue(children[1]); + if (first == 0.0) // Add 0 + return children[1]; + if (second == 0.0) // Add 0 + return children[0]; + if (first == first) // Add a constant + return ExpressionTreeNode(new Operation::AddConstant(first), children[1]); + if (second == second) // Add a constant + return ExpressionTreeNode(new Operation::AddConstant(second), children[0]); + if (children[1].getOperation().getId() == Operation::NEGATE) // a+(-b) = a-b + return ExpressionTreeNode(new Operation::Subtract(), children[0], children[1].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::NEGATE) // (-a)+b = b-a + return ExpressionTreeNode(new Operation::Subtract(), children[1], children[0].getChildren()[0]); + break; + } + case Operation::SUBTRACT: + { + if (children[0] == children[1]) + return ExpressionTreeNode(new Operation::Constant(0.0)); // Subtracting anything from itself is 0 + double first = getConstantValue(children[0]); + if (first == 0.0) // Subtract from 0 + return ExpressionTreeNode(new Operation::Negate(), children[1]); + double second = getConstantValue(children[1]); + if (second == 0.0) // Subtract 0 + return children[0]; + if (second == second) // Subtract a constant + return ExpressionTreeNode(new Operation::AddConstant(-second), children[0]); + if (children[1].getOperation().getId() == Operation::NEGATE) // a-(-b) = a+b + return ExpressionTreeNode(new Operation::Add(), children[0], children[1].getChildren()[0]); + break; + } + case Operation::MULTIPLY: + { + double first = getConstantValue(children[0]); + double second = getConstantValue(children[1]); + if (first == 0.0 || second == 0.0) // Multiply by 0 + return ExpressionTreeNode(new Operation::Constant(0.0)); + if (first == 1.0) // Multiply by 1 + return children[1]; + if (second == 1.0) // Multiply by 1 + return children[0]; + if (children[0].getOperation().getId() == Operation::CONSTANT) { // Multiply by a constant + if (children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one + return ExpressionTreeNode(new Operation::MultiplyConstant(first*dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]); + return ExpressionTreeNode(new Operation::MultiplyConstant(first), children[1]); + } + if (children[1].getOperation().getId() == Operation::CONSTANT) { // Multiply by a constant + if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one + return ExpressionTreeNode(new Operation::MultiplyConstant(second*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]); + return ExpressionTreeNode(new Operation::MultiplyConstant(second), children[0]); + } + if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel + return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant + return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0])); + if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant + return ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further + return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1])); + if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further + return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0])); + if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a*(1/b) = a/b + return ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::RECIPROCAL) // (1/a)*b = b/a + return ExpressionTreeNode(new Operation::Divide(), children[1], children[0].getChildren()[0]); + if (children[0] == children[1]) + return ExpressionTreeNode(new Operation::Square(), children[0]); // x*x = square(x) + if (children[0].getOperation().getId() == Operation::SQUARE && children[0].getChildren()[0] == children[1]) + return ExpressionTreeNode(new Operation::Cube(), children[1]); // x*x*x = cube(x) + if (children[1].getOperation().getId() == Operation::SQUARE && children[1].getChildren()[0] == children[0]) + return ExpressionTreeNode(new Operation::Cube(), children[0]); // x*x*x = cube(x) + break; + } + case Operation::DIVIDE: + { + if (children[0] == children[1]) + return ExpressionTreeNode(new Operation::Constant(1.0)); // Dividing anything from itself is 0 + double numerator = getConstantValue(children[0]); + if (numerator == 0.0) // 0 divided by something + return ExpressionTreeNode(new Operation::Constant(0.0)); + if (numerator == 1.0) // 1 divided by something + return ExpressionTreeNode(new Operation::Reciprocal(), children[1]); + double denominator = getConstantValue(children[1]); + if (denominator == 1.0) // Divide by 1 + return children[0]; + if (children[1].getOperation().getId() == Operation::CONSTANT) { + if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a divide into one multiply + return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()/denominator), children[0].getChildren()[0]); + return ExpressionTreeNode(new Operation::MultiplyConstant(1.0/denominator), children[0]); // Replace a divide with a multiply + } + if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel + return ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1].getChildren()[0]); + if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant + return ExpressionTreeNode(new Operation::Divide(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further + return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1])); + if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further + return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0])); + if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a/(1/b) = a*b + return ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]); + break; + } + case Operation::POWER: + { + double base = getConstantValue(children[0]); + if (base == 0.0) // 0 to any power is 0 + return ExpressionTreeNode(new Operation::Constant(0.0)); + if (base == 1.0) // 1 to any power is 1 + return ExpressionTreeNode(new Operation::Constant(1.0)); + double exponent = getConstantValue(children[1]); + if (exponent == 0.0) // x^0 = 1 + return ExpressionTreeNode(new Operation::Constant(1.0)); + if (exponent == 1.0) // x^1 = x + return children[0]; + if (exponent == -1.0) // x^-1 = recip(x) + return ExpressionTreeNode(new Operation::Reciprocal(), children[0]); + if (exponent == 2.0) // x^2 = square(x) + return ExpressionTreeNode(new Operation::Square(), children[0]); + if (exponent == 3.0) // x^3 = cube(x) + return ExpressionTreeNode(new Operation::Cube(), children[0]); + if (exponent == 0.5) // x^0.5 = sqrt(x) + return ExpressionTreeNode(new Operation::Sqrt(), children[0]); + if (exponent == exponent) // Constant power + return ExpressionTreeNode(new Operation::PowerConstant(exponent), children[0]); + break; + } + case Operation::NEGATE: + { + if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a negate into a single multiply + return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::CONSTANT) // Negate a constant + return ExpressionTreeNode(new Operation::Constant(-getConstantValue(children[0]))); + if (children[0].getOperation().getId() == Operation::NEGATE) // The two negations cancel + return children[0].getChildren()[0]; + break; + } + case Operation::MULTIPLY_CONSTANT: + { + if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one + return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]); + if (children[0].getOperation().getId() == Operation::CONSTANT) // Multiply two constants + return ExpressionTreeNode(new Operation::Constant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*getConstantValue(children[0]))); + if (children[0].getOperation().getId() == Operation::NEGATE) // Combine a multiply and a negate into a single multiply + return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()), children[0].getChildren()[0]); + break; + } + default: + { + // If operation ID is not one of the above, + // we don't substitute a simpler expression. + break; + } + + } + return ExpressionTreeNode(node.getOperation().clone(), children); +} + +ParsedExpression ParsedExpression::differentiate(const string& variable) const { + return differentiate(getRootNode(), variable); +} + +ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable) { + vector<ExpressionTreeNode> childDerivs(node.getChildren().size()); + for (int i = 0; i < (int) childDerivs.size(); i++) + childDerivs[i] = differentiate(node.getChildren()[i], variable); + return node.getOperation().differentiate(node.getChildren(),childDerivs, variable); +} + +double ParsedExpression::getConstantValue(const ExpressionTreeNode& node) { + if (node.getOperation().getId() == Operation::CONSTANT) + return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue(); + return numeric_limits<double>::quiet_NaN(); +} + +ExpressionProgram ParsedExpression::createProgram() const { + return ExpressionProgram(*this); +} + +CompiledExpression ParsedExpression::createCompiledExpression() const { + return CompiledExpression(*this); +} + +ParsedExpression ParsedExpression::renameVariables(const map<string, string>& replacements) const { + return ParsedExpression(renameNodeVariables(getRootNode(), replacements)); +} + +ExpressionTreeNode ParsedExpression::renameNodeVariables(const ExpressionTreeNode& node, const map<string, string>& replacements) { + if (node.getOperation().getId() == Operation::VARIABLE) { + map<string, string>::const_iterator replace = replacements.find(node.getOperation().getName()); + if (replace != replacements.end()) + return ExpressionTreeNode(new Operation::Variable(replace->second)); + } + vector<ExpressionTreeNode> children; + for (int i = 0; i < (int) node.getChildren().size(); i++) + children.push_back(renameNodeVariables(node.getChildren()[i], replacements)); + return ExpressionTreeNode(node.getOperation().clone(), children); +} + +ostream& Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) { + if (node.getOperation().isInfixOperator() && node.getChildren().size() == 2) { + out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName() << "(" << node.getChildren()[1] << ")"; + } + else if (node.getOperation().isInfixOperator() && node.getChildren().size() == 1) { + out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName(); + } + else { + out << node.getOperation().getName(); + if (node.getChildren().size() > 0) { + out << "("; + for (int i = 0; i < (int) node.getChildren().size(); i++) { + if (i > 0) + out << ", "; + out << node.getChildren()[i]; + } + out << ")"; + } + } + return out; +} + +ostream& Lepton::operator<<(ostream& out, const ParsedExpression& exp) { + out << exp.getRootNode(); + return out; +} diff --git a/lib/colvars/lepton/src/Parser.cpp b/lib/colvars/lepton/src/Parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6b19d7370de6baa44217e04cc48a7ad1ede72df7 --- /dev/null +++ b/lib/colvars/lepton/src/Parser.cpp @@ -0,0 +1,406 @@ +/* -------------------------------------------------------------------------- * + * Lepton * + * -------------------------------------------------------------------------- * + * This is part of the Lepton expression parser originating from * + * Simbios, the NIH National Center for Physics-Based Simulation of * + * Biological Structures at Stanford, funded under the NIH Roadmap for * + * Medical Research, grant U54 GM072970. See https://simtk.org. * + * * + * Portions copyright (c) 2009-2015 Stanford University and the Authors. * + * Authors: Peter Eastman * + * Contributors: * + * * + * Permission is hereby granted, free of charge, to any person obtaining a * + * copy of this software and associated documentation files (the "Software"), * + * to deal in the Software without restriction, including without limitation * + * the rights to use, copy, modify, merge, publish, distribute, sublicense, * + * and/or sell copies of the Software, and to permit persons to whom the * + * Software is furnished to do so, subject to the following conditions: * + * * + * The above copyright notice and this permission notice shall be included in * + * all copies or substantial portions of the Software. * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * + * USE OR OTHER DEALINGS IN THE SOFTWARE. * + * -------------------------------------------------------------------------- */ + +#include "lepton/Parser.h" +#include "lepton/CustomFunction.h" +#include "lepton/Exception.h" +#include "lepton/ExpressionTreeNode.h" +#include "lepton/Operation.h" +#include "lepton/ParsedExpression.h" +#include <cctype> +#include <iostream> + +using namespace Lepton; +using namespace std; + +static const string Digits = "0123456789"; +static const string Operators = "+-*/^"; +static const bool LeftAssociative[] = {true, true, true, true, false}; +static const int Precedence[] = {0, 0, 1, 1, 3}; +static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER}; + +class Lepton::ParseToken { +public: + enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace}; + + ParseToken(string text, Type type) : text(text), type(type) { + } + const string& getText() const { + return text; + } + Type getType() const { + return type; + } +private: + string text; + Type type; +}; + +string Parser::trim(const string& expression) { + // Remove leading and trailing spaces. + + int start, end; + for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++) + ; + for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--) + ; + if (start == end && isspace(expression[end])) + return ""; + return expression.substr(start, end-start+1); +} + +ParseToken Parser::getNextToken(const string& expression, int start) { + char c = expression[start]; + if (c == '(') + return ParseToken("(", ParseToken::LeftParen); + if (c == ')') + return ParseToken(")", ParseToken::RightParen); + if (c == ',') + return ParseToken(",", ParseToken::Comma); + if (Operators.find(c) != string::npos) + return ParseToken(string(1, c), ParseToken::Operator); + if (isspace(c)) { + // White space + + for (int pos = start+1; pos < (int) expression.size(); pos++) { + if (!isspace(expression[pos])) + return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace); + } + return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace); + } + if (c == '.' || Digits.find(c) != string::npos) { + // A number + + bool foundDecimal = (c == '.'); + bool foundExp = false; + int pos; + for (pos = start+1; pos < (int) expression.size(); pos++) { + c = expression[pos]; + if (Digits.find(c) != string::npos) + continue; + if (c == '.' && !foundDecimal) { + foundDecimal = true; + continue; + } + if ((c == 'e' || c == 'E') && !foundExp) { + foundExp = true; + if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+')) + pos++; + continue; + } + break; + } + return ParseToken(expression.substr(start, pos-start), ParseToken::Number); + } + + // A variable, function, or left parenthesis + + for (int pos = start; pos < (int) expression.size(); pos++) { + c = expression[pos]; + if (c == '(') + return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function); + if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c)) + return ParseToken(expression.substr(start, pos-start), ParseToken::Variable); + } + return ParseToken(expression.substr(start, string::npos), ParseToken::Variable); +} + +vector<ParseToken> Parser::tokenize(const string& expression) { + vector<ParseToken> tokens; + int pos = 0; + while (pos < (int) expression.size()) { + ParseToken token = getNextToken(expression, pos); + if (token.getType() != ParseToken::Whitespace) + tokens.push_back(token); + pos += (int) token.getText().size(); + } + return tokens; +} + +ParsedExpression Parser::parse(const string& expression) { + return parse(expression, map<string, CustomFunction*>()); +} + +ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) { + try { + // First split the expression into subexpressions. + + string primaryExpression = expression; + vector<string> subexpressions; + while (true) { + string::size_type pos = primaryExpression.find_last_of(';'); + if (pos == string::npos) + break; + string sub = trim(primaryExpression.substr(pos+1)); + if (sub.size() > 0) + subexpressions.push_back(sub); + primaryExpression = primaryExpression.substr(0, pos); + } + + // Parse the subexpressions. + + map<string, ExpressionTreeNode> subexpDefs; + for (int i = 0; i < (int) subexpressions.size(); i++) { + string::size_type equalsPos = subexpressions[i].find('='); + if (equalsPos == string::npos) + throw Exception("subexpression does not specify a name"); + string name = trim(subexpressions[i].substr(0, equalsPos)); + if (name.size() == 0) + throw Exception("subexpression does not specify a name"); + vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1)); + int pos = 0; + subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0); + if (pos != tokens.size()) + throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText()); + } + + // Now parse the primary expression. + + vector<ParseToken> tokens = tokenize(primaryExpression); + int pos = 0; + ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0); + if (pos != tokens.size()) + throw Exception("unexpected text at end of expression: "+tokens[pos].getText()); + return ParsedExpression(result); + } + catch (Exception& ex) { + throw Exception("Parse error in expression \""+expression+"\": "+ex.what()); + } +} + +ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions, + const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) { + if (pos == tokens.size()) + throw Exception("unexpected end of expression"); + + // Parse the next value (number, variable, function, parenthesized expression) + + ParseToken token = tokens[pos]; + ExpressionTreeNode result; + if (token.getType() == ParseToken::Number) { + double value; + stringstream(token.getText()) >> value; + result = ExpressionTreeNode(new Operation::Constant(value)); + pos++; + } + else if (token.getType() == ParseToken::Variable) { + map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText()); + if (subexp == subexpressionDefs.end()) { + Operation* op = new Operation::Variable(token.getText()); + result = ExpressionTreeNode(op); + } + else + result = subexp->second; + pos++; + } + else if (token.getType() == ParseToken::LeftParen) { + pos++; + result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0); + if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) + throw Exception("unbalanced parentheses"); + pos++; + } + else if (token.getType() == ParseToken::Function) { + pos++; + vector<ExpressionTreeNode> args; + bool moreArgs; + do { + args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0)); + moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma); + if (moreArgs) + pos++; + } while (moreArgs); + if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) + throw Exception("unbalanced parentheses"); + pos++; + Operation* op = getFunctionOperation(token.getText(), customFunctions); + try { + result = ExpressionTreeNode(op, args); + } + catch (...) { + delete op; + throw; + } + } + else if (token.getType() == ParseToken::Operator && token.getText() == "-") { + pos++; + ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2); + result = ExpressionTreeNode(new Operation::Negate(), toNegate); + } + else + throw Exception("unexpected token: "+token.getText()); + + // Now deal with the next binary operator. + + while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) { + token = tokens[pos]; + int opIndex = (int) Operators.find(token.getText()); + int opPrecedence = Precedence[opIndex]; + if (opPrecedence < precedence) + return result; + pos++; + ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence); + Operation* op = getOperatorOperation(token.getText()); + try { + result = ExpressionTreeNode(op, result, arg); + } + catch (...) { + delete op; + throw; + } + } + return result; +} + +Operation* Parser::getOperatorOperation(const std::string& name) { + switch (OperationId[Operators.find(name)]) { + case Operation::ADD: + return new Operation::Add(); + case Operation::SUBTRACT: + return new Operation::Subtract(); + case Operation::MULTIPLY: + return new Operation::Multiply(); + case Operation::DIVIDE: + return new Operation::Divide(); + case Operation::POWER: + return new Operation::Power(); + default: + throw Exception("unknown operator"); + } +} + +Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) { + + static map<string, Operation::Id> opMap; + if (opMap.size() == 0) { + opMap["sqrt"] = Operation::SQRT; + opMap["exp"] = Operation::EXP; + opMap["log"] = Operation::LOG; + opMap["sin"] = Operation::SIN; + opMap["cos"] = Operation::COS; + opMap["sec"] = Operation::SEC; + opMap["csc"] = Operation::CSC; + opMap["tan"] = Operation::TAN; + opMap["cot"] = Operation::COT; + opMap["asin"] = Operation::ASIN; + opMap["acos"] = Operation::ACOS; + opMap["atan"] = Operation::ATAN; + opMap["sinh"] = Operation::SINH; + opMap["cosh"] = Operation::COSH; + opMap["tanh"] = Operation::TANH; + opMap["erf"] = Operation::ERF; + opMap["erfc"] = Operation::ERFC; + opMap["step"] = Operation::STEP; + opMap["delta"] = Operation::DELTA; + opMap["square"] = Operation::SQUARE; + opMap["cube"] = Operation::CUBE; + opMap["recip"] = Operation::RECIPROCAL; + opMap["min"] = Operation::MIN; + opMap["max"] = Operation::MAX; + opMap["abs"] = Operation::ABS; + opMap["floor"] = Operation::FLOOR; + opMap["ceil"] = Operation::CEIL; + opMap["select"] = Operation::SELECT; + } + string trimmed = name.substr(0, name.size()-1); + + // First check custom functions. + + map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed); + if (custom != customFunctions.end()) + return new Operation::Custom(trimmed, custom->second->clone()); + + // Now try standard functions. + + map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed); + if (iter == opMap.end()) + throw Exception("unknown function: "+trimmed); + switch (iter->second) { + case Operation::SQRT: + return new Operation::Sqrt(); + case Operation::EXP: + return new Operation::Exp(); + case Operation::LOG: + return new Operation::Log(); + case Operation::SIN: + return new Operation::Sin(); + case Operation::COS: + return new Operation::Cos(); + case Operation::SEC: + return new Operation::Sec(); + case Operation::CSC: + return new Operation::Csc(); + case Operation::TAN: + return new Operation::Tan(); + case Operation::COT: + return new Operation::Cot(); + case Operation::ASIN: + return new Operation::Asin(); + case Operation::ACOS: + return new Operation::Acos(); + case Operation::ATAN: + return new Operation::Atan(); + case Operation::SINH: + return new Operation::Sinh(); + case Operation::COSH: + return new Operation::Cosh(); + case Operation::TANH: + return new Operation::Tanh(); + case Operation::ERF: + return new Operation::Erf(); + case Operation::ERFC: + return new Operation::Erfc(); + case Operation::STEP: + return new Operation::Step(); + case Operation::DELTA: + return new Operation::Delta(); + case Operation::SQUARE: + return new Operation::Square(); + case Operation::CUBE: + return new Operation::Cube(); + case Operation::RECIPROCAL: + return new Operation::Reciprocal(); + case Operation::MIN: + return new Operation::Min(); + case Operation::MAX: + return new Operation::Max(); + case Operation::ABS: + return new Operation::Abs(); + case Operation::FLOOR: + return new Operation::Floor(); + case Operation::CEIL: + return new Operation::Ceil(); + case Operation::SELECT: + return new Operation::Select(); + default: + throw Exception("unknown function"); + } +} diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux deleted file mode 100644 index 2991b9643d8951f6bddfa42d99f28cd039ec29bb..0000000000000000000000000000000000000000 --- a/lib/gpu/Makefile.linux +++ /dev/null @@ -1,52 +0,0 @@ -# /* ---------------------------------------------------------------------- -# Generic Linux Makefile for CUDA -# - Change CUDA_ARCH for your GPU -# ------------------------------------------------------------------------- */ - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.standard - -ifeq ($(CUDA_HOME),) -CUDA_HOME = /opt/cuda -endif - -NVCC = nvcc - -# Tesla CUDA -#CUDA_ARCH = -arch=sm_21 -# newer CUDA -#CUDA_ARCH = -arch=sm_13 -# older CUDA -#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE -CUDA_ARCH = -arch=sm_61 - -# this setting should match LAMMPS Makefile -# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL - -LMP_INC = -DLAMMPS_SMALLBIG - -# precision for GPU calculations -# -D_SINGLE_SINGLE # Single precision for all calculations -# -D_DOUBLE_DOUBLE # Double precision for all calculations -# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double - -CUDA_PRECISION = -D_SINGLE_DOUBLE - -CUDA_INCLUDE = -I$(CUDA_HOME)/include -CUDA_LIB = -L$(CUDA_HOME)/lib64 -CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -Xcompiler -fPIC - -CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC -CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias - -BIN_DIR = ./ -OBJ_DIR = ./ -LIB_DIR = ./ -AR = ar -BSH = /bin/sh - -CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini - -include Nvidia.makefile - diff --git a/lib/kim/Install.py b/lib/kim/Install.py index aa244ee6eabd0f1296c1f39fd74639aab9f021dc..0e873889546d198398f22d3fc2df37d0bc3fec06 100644 --- a/lib/kim/Install.py +++ b/lib/kim/Install.py @@ -21,7 +21,7 @@ Syntax from lib dir: python Install.py -b -v version -a kim-name specify one or more options, order does not matter -v = version of KIM API library to use - default = kim-api-v1.8.2 (current as of June 2017) + default = kim-api-v1.9.2 (current as of Oct 2017) -b = download and build base KIM API library with example Models this will delete any previous installation in the current folder -n = do NOT download and build base KIM API library. @@ -109,7 +109,7 @@ nargs = len(args) if nargs == 0: error() thisdir = os.environ['PWD'] -version = "kim-api-v1.8.2" +version = "kim-api-v1.9.2" buildflag = False everythingflag = False @@ -150,7 +150,7 @@ while iarg < len(args): else: error() thisdir = os.path.abspath(thisdir) -url = "https://s3.openkim.org/kim-api/%s.tgz" % version +url = "https://s3.openkim.org/kim-api/%s.txz" % version # set KIM API directory @@ -199,9 +199,9 @@ if buildflag: # download entire kim-api tarball print("Downloading kim-api tarball ...") - geturl(url,"%s/%s.tgz" % (thisdir,version)) + geturl(url,"%s/%s.txz" % (thisdir,version)) print("Unpacking kim-api tarball ...") - cmd = 'cd "%s"; rm -rf "%s"; tar -xzvf %s.tgz' % (thisdir,version,version) + cmd = 'cd "%s"; rm -rf "%s"; tar -xJvf %s.txz' % (thisdir,version,version) subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) # configure kim-api @@ -211,18 +211,6 @@ if buildflag: subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) # build kim-api - - print("Configuring example Models") - cmd = 'cd "%s/%s"; make add-examples' % (thisdir,version) - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - if verboseflag: print (txt.decode("UTF-8")) - - if everythingflag: - print("Configuring all OpenKIM models, this will take a while ...") - cmd = 'cd "%s/%s"; make add-OpenKIM' % (thisdir,version) - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - if verboseflag: print(txt.decode("UTF-8")) - print("Building kim-api ...") cmd = 'cd "%s/%s"; make' % (thisdir,version) txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) @@ -235,18 +223,28 @@ if buildflag: txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) if verboseflag: print(txt.decode("UTF-8")) - cmd = 'cd "%s/%s"; make install-set-default-to-v1' %(thisdir,version) - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - if verboseflag: print(txt.decode("UTF-8")) - # remove source files + print("Building and installing example Models") + cmd = 'cd "%s/%s/examples"; make model-drivers-all-system' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print (txt.decode("UTF-8")) + cmd = 'cd "%s/%s/examples"; make models-all-system' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print (txt.decode("UTF-8")) + print("Removing kim-api source and build files ...") - cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' % (thisdir,version,version) + cmd = 'cd "%s"; rm -rf %s; rm -rf %s.txz' % (thisdir,version,version) subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) -# add a single model (and possibly its driver) to existing KIM installation + # add all OpenKIM models, if desired + if everythingflag: + print("Adding all OpenKIM models, this will take a while ...") + cmd = '%s/bin/kim-api-v1-collections-management install system OpenKIM' % (kimdir) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print(txt.decode("UTF-8")) +# add single OpenKIM model if addflag: if not os.path.isdir(kimdir): @@ -254,64 +252,6 @@ if addflag: error() # download single model - - print("Downloading tarball for %s..." % addmodelname) - url = "https://openkim.org/download/%s.tgz" % addmodelname - geturl(url,"%s/%s.tgz" % (thisdir,addmodelname)) - - print("Unpacking item tarball ...") - cmd = 'cd "%s"; tar -xzvf %s.tgz' % (thisdir,addmodelname) - subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - - print("Building item ...") - cmd = 'cd "%s/%s"; make; make install' %(thisdir,addmodelname) - try: - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - except subprocess.CalledProcessError as e: - - # Error: but first, check to see if it needs a driver - firstRunOutput = e.output.decode("UTF-8") - - cmd = 'cd "%s/%s"; make kim-item-type' % (thisdir,addmodelname) - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - txt = txt.decode("UTF-8") - if txt == "ParameterizedModel": - - # Get and install driver - - cmd = 'cd "%s/%s"; make model-driver-name' % (thisdir,addmodelname) - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - adddrivername = txt.decode("UTF-8").strip() - print("First installing model driver: %s..." % adddrivername) - cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,adddrivername) - try: - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - except subprocess.CalledProcessError as e: - print(e.output) - sys.exit() - - if verboseflag: print(txt.decode("UTF-8")) - - # now install the model that needed the driver - - print("Now installing model : %s" % addmodelname) - cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,addmodelname) - try: - txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - except subprocess.CalledProcessError as e: - print(e.output) - sys.exit() - print(txt.decode("UTF-8")) - sys.exit() - else: - print(firstRunOutput) - print("Error, unable to build and install OpenKIM item: %s" \ - % addmodelname) - sys.exit() - - # success the first time - - if verboseflag: print(txt.decode("UTF-8")) - print("Removing kim item source and build files ...") - cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' %(thisdir,addmodelname,addmodelname) - subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + cmd = '%s/bin/kim-api-v1-collections-management install system %s' % (kimdir, addmodelname) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print (txt.decode("UTF-8")) diff --git a/lib/kim/Makefile.lammps b/lib/kim/Makefile.lammps index b66d7005a498f119341ba24cef5e24cfdda99630..d73891d1e20578b890946c69f091f45c78131b4f 100644 --- a/lib/kim/Makefile.lammps +++ b/lib/kim/Makefile.lammps @@ -18,10 +18,10 @@ include ../../lib/kim/Makefile.KIM_DIR -ifeq ($(wildcard $(KIM_INSTALL_DIR)/bin/kim-api-build-config),) - KIM_CONFIG_HELPER = kim-api-build-config +ifeq ($(wildcard $(KIM_INSTALL_DIR)/bin/kim-api-v1-build-config),) + KIM_CONFIG_HELPER = kim-api-v1-build-config else - KIM_CONFIG_HELPER = $(KIM_INSTALL_DIR)/bin/kim-api-build-config + KIM_CONFIG_HELPER = $(KIM_INSTALL_DIR)/bin/kim-api-v1-build-config endif ifeq ($(shell $(KIM_CONFIG_HELPER) --version 2> /dev/null),) $(error $(KIM_CONFIG_HELPER) utility is not available. Something is wrong with your KIM API package setup) diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 43d3f17d634eacb38cf1f017afb9fea5706ac7cb..d414056187771f977114194c8e3e0091de47de75 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,24 @@ # Change Log +## [2.04.04](https://github.com/kokkos/kokkos/tree/2.04.04) (2017-09-11) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.00...2.04.04) + +**Implemented enhancements:** + +- OpenMP partition: set number of threads on nested level [\#1082](https://github.com/kokkos/kokkos/issues/1082) +- Add StaticCrsGraph row\(\) method [\#1071](https://github.com/kokkos/kokkos/issues/1071) +- Enhance Kokkos complex operator overloading [\#1052](https://github.com/kokkos/kokkos/issues/1052) +- Tell Trilinos packages about host+device lambda [\#1019](https://github.com/kokkos/kokkos/issues/1019) +- Function markup for defaulted class members [\#952](https://github.com/kokkos/kokkos/issues/952) +- Add deterministic random number generator [\#857](https://github.com/kokkos/kokkos/issues/857) + +**Fixed bugs:** + +- Fix reduction\_identity\<T\>::max for floating point numbers [\#1048](https://github.com/kokkos/kokkos/issues/1048) +- Fix MD iteration policy ignores lower bound on GPUs [\#1041](https://github.com/kokkos/kokkos/issues/1041) +- (Experimental) HBWSpace Linking issues in KokkosKernels [\#1094](https://github.com/kokkos/kokkos/issues/1094) +- (Experimental) ROCm: algorithms/unit\_tests test\_sort failing with segfault [\#1070](https://github.com/kokkos/kokkos/issues/1070) + ## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos deleted file mode 100644 index 0b0036da374b7bf1d0cccb5f515124012b689888..0000000000000000000000000000000000000000 --- a/lib/kokkos/Makefile.kokkos +++ /dev/null @@ -1,954 +0,0 @@ -# Default settings common options. - -#LAMMPS specific settings: -KOKKOS_PATH=../../lib/kokkos -CXXFLAGS=$(CCFLAGS) - -# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial -KOKKOS_DEVICES ?= "Cuda, OpenMP" -#KOKKOS_DEVICES ?= "Pthreads" -# Options: -KOKKOS_ARCH ?= "Pascal61" -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX -# IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega -# AMD-CPUS: AMDAVX,Ryzen,Epyc -# Options: yes,no -KOKKOS_DEBUG ?= "no" -# Options: hwloc,librt,experimental_memkind -KOKKOS_USE_TPLS ?= "" -# Options: c++11,c++1z -KOKKOS_CXX_STANDARD ?= "c++11" -# Options: aggressive_vectorization,disable_profiling -KOKKOS_OPTIONS ?= "" - -# Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda -KOKKOS_CUDA_OPTIONS ?= "enable_lambda" - -# Check for general settings. -KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) -KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) -KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l)) - -# Check for external libraries. -KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) -KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) -KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) - -# Check for advanced settings. -KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l)) -KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) -KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) -KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l)) -KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) - - -# Check for Kokkos Host Execution Spaces one of which must be on. -KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l)) -KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) -KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) -KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) - ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) - ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 - endif - endif -endif - -# Check for other Execution Spaces. -KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) -KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l)) -KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l)) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) - KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) -endif - -# Check OS. -KOKKOS_OS := $(strip $(shell uname -s)) -KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l)) -KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l)) -KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l)) - -# Check compiler. -KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)) -KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)) -KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) -KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)) -ifneq ($(OMPI_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l)) -endif -ifneq ($(MPICH_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l)) -endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) -KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l)) - -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) - KOKKOS_INTERNAL_COMPILER_CLANG = 1 -endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2) - KOKKOS_INTERNAL_COMPILER_XL = 1 -endif - -# Apple Clang passes both clang and apple clang tests, so turn off clang. -ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CLANG = 0 -endif -# AMD HCC passes both clang and hcc test so turn off clang -ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) - KOKKOS_INTENAL_COMPILER_CLANG = 0 -endif - -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') - - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) - $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) - endif - - KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 - endif -endif - -# Set compiler warnings flags. -ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - # TODO check if PGI accepts GNU style warnings - KOKKOS_INTERNAL_COMPILER_WARNINGS = - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # TODO check if cray accepts GNU style warnings - KOKKOS_INTERNAL_COMPILER_WARNINGS = - else - #gcc - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized - endif - endif - endif - endif - endif -else - KOKKOS_INTERNAL_COMPILER_WARNINGS = -endif - -# Set OpenMP flags. -ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -mp -else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment. - KOKKOS_INTERNAL_OPENMP_FLAG := - else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp - endif - endif - endif - endif -endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh -else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp - endif -endif - -# Set C++11 flags. -ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_INTERNAL_CXX11_FLAG := --c++11 -else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) - KOKKOS_INTERNAL_CXX11_FLAG := - else - KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 - KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z - endif - endif - endif -endif - -# Check for Kokkos Architecture settings. - -# Intel based. -KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) - -# NVIDIA based. -NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper -KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) - KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) - KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) - endif - endif -endif -# ARM based. -KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc)) - -# IBM based. -KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) - -# AMD based. -KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l)) - -# Any AVX? -KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) - -# Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc )) - -# Decide whether we can support transactional memory -KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) - -# Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) -KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) - $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) - $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) -endif - -# Generating the list of Flags. - -KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src - -KOKKOS_CXXFLAGS = -ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS) -endif - -KOKKOS_LIBS = -lkokkos -ldl -KOKKOS_LDFLAGS = -L$(shell pwd) -KOKKOS_SRC = -KOKKOS_HEADERS = - -# Generating the KokkosCore_config.h file. - -tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) -tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) -tmp := $(shell date >> KokkosCore_config.tmp) -tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) - -tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#else' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp) - -tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo - endif - - KOKKOS_CXXFLAGS += -g - KOKKOS_LDFLAGS += -g -ldl - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp ) - endif -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_LIBS += -lhwloc - tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) - tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) - KOKKOS_LIBS += -lrt -endif - -ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_LIBS += -lmemkind - tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp ) -endif - -tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) - -ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) - tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp ) -endif - -tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) - endif - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda - else - $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) - endif - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - endif - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp ) - endif -endif - -# Add Architecture flags. - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a - KOKKOS_LDFLAGS += -march=armv8-a - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8.1-a - KOKKOS_LDFLAGS += -march=armv8.1-a - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx - KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xSSE4.2 - KOKKOS_LDFLAGS += -xSSE4.2 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=nehalem - KOKKOS_LDFLAGS += -tp=nehalem - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -msse4.2 - KOKKOS_LDFLAGS += -msse4.2 - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=sandybridge - KOKKOS_LDFLAGS += -tp=sandybridge - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - - else - # Assume that this is a really a GNU compiler or it could be XL on P8. - KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7 - KOKKOS_LDFLAGS += -mcpu=power7 -mtune=power7 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - - else - # Assume that this is a really a GNU compiler or it could be XL on P8. - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - - else - # Assume that this is a really a GNU compiler or it could be XL on P9. - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=haswell - KOKKOS_LDFLAGS += -tp=haswell - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=haswell - KOKKOS_LDFLAGS += -tp=haswell - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xMIC-AVX512 - KOKKOS_LDFLAGS += -xMIC-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - - else - # Asssume that this is really a GNU compiler. - KOKKOS_CXXFLAGS += -march=knl -mtune=knl - KOKKOS_LDFLAGS += -march=knl -mtune=knl - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - - else - # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm - KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm - endif - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -mmic - KOKKOS_LDFLAGS += -mmic -endif - -# Figure out the architecture flag for Cuda. -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch - KOKKOS_CXXFLAGS += -x cuda - endif - - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 - endif - - ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - endif -endif - -# Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp ) - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 - endif - - - KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX)) - ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=) - - KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) - KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG) - - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp) -endif - -KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) - -ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) - KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) -else - KOKKOS_INTERNAL_NEW_CONFIG := 1 -endif - -ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) - tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) -endif - -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) - -KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) -KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_LIBS += -lcudart -lcuda - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) - endif - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) - endif - - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) - KOKKOS_LIBS += -lpthread -endif - -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) - KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include - KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_LIBS += -lqthread -endif - -# Explicitly set the GCC Toolchain for Clang. -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) - KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) - KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -endif - -# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning. -ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) -endif - -# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning. -ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC)) -endif - -# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial -# device to avoid a link warning. -ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC)) -endif - -# With Cygwin functions such as fdopen and fileno are not defined -# when strict ansi is enabled. strict ansi gets enabled with --std=c++11 -# though. So we hard undefine it here. Not sure if that has any bad side effects -# This is needed for gtest actually, not for Kokkos itself! -ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) - KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ -endif - -# Setting up dependencies. - -KokkosCore_config.h: - -KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS) - -KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) -KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) - -include $(KOKKOS_PATH)/Makefile.targets - -kokkos-clean: - rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a - -libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) - ar cr libkokkos.a $(KOKKOS_OBJ_LINK) - ranlib libkokkos.a - -KOKKOS_LINK_DEPENDS=libkokkos.a diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 9082e47052937dd63a483099ae43a7c65e86bff3..3db9a145d7c9dc7ab1da3a1e6f034a2a0e7d929b 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -1265,6 +1265,243 @@ void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift102 } +#endif + +#if defined(KOKKOS_ENABLE_ROCM) + + template<> + class Random_XorShift1024<Kokkos::Experimental::ROCm> { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>; + public: + + typedef Kokkos::Experimental::ROCm device_type; + typedef Random_XorShift1024_Pool<device_type> pool_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast<int>(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast<uint32_t>(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast<int>(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast<int64_t>(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; + S = U*U+V*V; + } + return U*std::sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + +template<> +inline +Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift64<Kokkos::Experimental::ROCm> Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const { +#ifdef __HCC_ACCELERATOR__ + const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; + int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * + blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim_x*blockDim_y*blockDim_z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i),i); +#else + return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0),0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift64<Kokkos::Experimental::ROCm> &state) const { +#ifdef __HCC_ACCELERATOR__ + state_(state.state_idx_) = state.state_; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +template<> +inline +Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift1024<Kokkos::Experimental::ROCm> Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const { +#ifdef __HCC_ACCELERATOR__ + const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; + int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * + blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim_x*blockDim_y*blockDim_z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i); +#else + return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift1024<Kokkos::Experimental::ROCm> &state) const { +#ifdef __HCC_ACCELERATOR__ + for(int i=0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + locks_(state.state_idx_) = 0; + return; +#endif +} + + #endif diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index b74192ef18d0c132e7b734bcb27796304744ed66..a5a10c82ee525d4b4ca9bdc2ba3ae002ce0858dd 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -30,6 +30,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) TEST_TARGETS += test-cuda endif +ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) + OBJ_ROCM = TestROCm.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_ROCm + TEST_TARGETS += test-rocm +endif + ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_Threads @@ -51,6 +57,9 @@ endif KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda +KokkosAlgorithms_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_ROCm + KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads @@ -63,6 +72,9 @@ KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) test-cuda: KokkosAlgorithms_UnitTest_Cuda ./KokkosAlgorithms_UnitTest_Cuda +test-rocm: KokkosAlgorithms_UnitTest_ROCm + ./KokkosAlgorithms_UnitTest_ROCm + test-threads: KokkosAlgorithms_UnitTest_Threads ./KokkosAlgorithms_UnitTest_Threads diff --git a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..720b377ed2fb29a74d241ea6c42b46e3b15b5541 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_ROCM + +#include <cstdint> +#include <iostream> +#include <iomanip> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#include <TestRandom.hpp> +#include <TestSort.hpp> + +namespace Test { + +class rocm : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Experimental::ROCm::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +void rocm_test_random_xorshift64( int num_draws ) +{ + Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::ROCm> >(num_draws); +} + +void rocm_test_random_xorshift1024( int num_draws ) +{ + Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::ROCm> >(num_draws); +} + + +#define ROCM_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( rocm, Random_XorShift64 ) { \ + rocm_test_random_xorshift64(num_draws); \ + } + +#define ROCM_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( rocm, Random_XorShift1024 ) { \ + rocm_test_random_xorshift1024(num_draws); \ + } + +#define ROCM_SORT_UNSIGNED( size ) \ + TEST_F( rocm, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Experimental::ROCm, unsigned >(size); \ + } + +ROCM_RANDOM_XORSHIFT64( 132141141 ) +ROCM_RANDOM_XORSHIFT1024( 52428813 ) +ROCM_SORT_UNSIGNED(171) + +#undef ROCM_RANDOM_XORSHIFT64 +#undef ROCM_RANDOM_XORSHIFT1024 +#undef ROCM_SORT_UNSIGNED +} +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTROCM_PREVENT_LINK_ERROR() {} +#endif /* #ifdef KOKKOS_ENABLE_ROCM */ + diff --git a/lib/kokkos/bin/hpcbind b/lib/kokkos/bin/hpcbind index ca34648780174d626bc2b04dbbbb282eda3f9dff..b88b334f8bdf22365d5c8159ffebb47de9ecb2a7 100755 --- a/lib/kokkos/bin/hpcbind +++ b/lib/kokkos/bin/hpcbind @@ -27,7 +27,7 @@ fi HPCBIND_HWLOC_PARENT_CPUSET="" if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then MY_PID="$BASHPID" - HPCBIND_HWLOC_PARENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2) + HPCBIND_HWLOC_PARENT_CPUSET="$(hwloc-ps -a --cpuset | grep ${MY_PID} | cut -f 2)" fi ################################################################################ @@ -58,23 +58,34 @@ declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0)) ################################################################################ HPCBIND_QUEUE_NAME="" declare -i HPCBIND_QUEUE_INDEX=0 -declare -i HPCBIND_QUEUE_GPU_MAPPING=0 - -if [[ ! -z "${SLURM_LOCAL_ID}" ]]; then - HPCBIND_QUEUE_GPU_MAPPING=1 - HPCBIND_QUEUE_NAME="sbatch" +declare -i HPCBIND_QUEUE_MAPPING=0 + +if [[ ! -z "${PMI_RANK}" ]]; then + HPCBIND_QUEUE_MAPPING=1 + HPCBIND_QUEUE_NAME="mpich" + HPCBIND_QUEUE_INDEX=${PMI_RANK} +elif [[ ! -z "${OMPI_COMM_WORLD_RANK}" ]]; then + HPCBIND_QUEUE_MAPPING=1 + HPCBIND_QUEUE_NAME="openmpi" + HPCBIND_QUEUE_INDEX=${OMPI_COMM_WORLD_RANK} +elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then + HPCBIND_QUEUE_MAPPING=1 + HPCBIND_QUEUE_NAME="mvapich2" + HPCBIND_QUEUE_INDEX=${MV2_COMM_WORLD_RANK} +elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then + HPCBIND_QUEUE_MAPPING=1 + HPCBIND_QUEUE_NAME="slurm" HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID} elif [[ ! -z "${LBS_JOBINDEX}" ]]; then - HPCBIND_QUEUE_GPU_MAPPING=1 + HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="bsub" HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX} elif [[ ! -z "${ALPS_APP_PE}" ]]; then - HPCBIND_QUEUE_GPU_MAPPING=1 + HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="aprun" HPCBIND_QUEUE_INDEX=${ALPS_APP_PE} fi - ################################################################################ # Show help ################################################################################ @@ -91,13 +102,14 @@ function show_help { echo " --proc-bind=<LOC> Set the initial process mask for the script" echo " LOC can be any valid location argument for" echo " hwloc-calc Default: all" + echo " --whole-system ${cmd} will ignore the its parent process binding" echo " --distribute=N Distribute the current cpuset into N partitions" echo " --distribute-partition=I" echo " Use the i'th partition (zero based)" echo " --visible-gpus=<L> Comma separated list of gpu ids" echo " Default: CUDA_VISIBLE_DEVICES or all gpus in" echo " sequential order" - echo " --gpu-ignore-queue Ignore queue job id when choosing visible GPU" + echo " --ignore-queue Ignore queue job id when choosing visible GPU and partition" echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES" echo " --openmp=M.m Set env variables for the given OpenMP version" echo " Default: 4.0" @@ -110,22 +122,30 @@ function show_help { echo " --force-openmp-proc-bind=<OP>" echo " Override logic for selecting OMP_PROC_BIND" echo " --no-openmp-nested Set OMP_NESTED to false" - echo " --show-bindings Show the bindings" - echo " --lstopo Show bindings in lstopo without executing a command" - echo " -v|--verbose Show options and relevant environment variables" + echo " --output-prefix=<P> Save the output to files of the form" + echo " P-N.log, P-N.out and P-N.err where P is the prefix" + echo " and N is the queue index or mpi rank (no spaces)" + echo " --output-mode=<Op> How console output should be handled." + echo " Options are all, rank0, and none. Default: rank0" + echo " --lstopo Show bindings in lstopo" + echo " -v|--verbose Print bindings and relevant environment variables" echo " -h|--help Show this message" echo "" echo "Sample Usage:" echo " Split the current process cpuset into 4 and use the 3rd partition" echo " ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..." - echo " Bing the process to all even cores" + echo " Launch 16 jobs over 4 nodes with 4 jobs per node using only the even pus" + echo " and save the output to rank specific files" + echo " mpiexec -N 16 -npernode 4 ${cmd} --whole-system --proc-bind=pu:even \\" + echo " --distribute=4 -v --output-prefix=output -- command ..." + echo " Bind the process to all even cores" echo " ${cmd} --proc-bind=core:even -v -- command ..." - echo " Bind to the first 64 cores and split the current process cpuset into 4" - echo " ${cmd} --proc-bind=core:0-63 --distribute=4 --distribute-partition=0 -- command ..." - echo " skip GPU 0 when mapping visible devices" + echo " Bind the the even cores of socket 0 and the odd cores of socket 1" + echo " ${cmd} --proc-bind='socket:0.core:even socket:1.core:odd' -v -- command ..." + echo " Skip GPU 0 when mapping visible devices" echo " ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..." echo " Display the current bindings" - echo " ${cmd} --proc-bind=numa:0 --show-bindings -- command" + echo " ${cmd} --proc-bind=numa:0 -- command" echo " Display the current bindings using lstopo" echo " ${cmd} --proc-bind=numa:0.core:odd --lstopo" echo "" @@ -144,7 +164,7 @@ fi declare -a UNKNOWN_ARGS=() declare -i HPCBIND_ENABLE_HWLOC_BIND=${HPCBIND_HAS_HWLOC} declare -i HPCBIND_DISTRIBUTE=1 -declare -i HPCBIND_PARTITION=0 +declare -i HPCBIND_PARTITION=-1 HPCBIND_PROC_BIND="all" HPCBIND_OPENMP_VERSION=4.0 declare -i HPCBIND_OPENMP_PERCENT=100 @@ -155,11 +175,15 @@ HPCBIND_OPENMP_FORCE_PROC_BIND="" HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true} declare -i HPCBIND_VERBOSE=0 -declare -i HPCBIND_SHOW_BINDINGS=0 declare -i HPCBIND_LSTOPO=0 -for i in $@; do - case $i in +HPCBIND_OUTPUT_PREFIX="" +HPCBIND_OUTPUT_MODE="rank0" + +declare -i HPCBIND_HAS_COMMAND=0 + +for i in "$@"; do + case "$i" in # number of partitions to create --no-hwloc-bind) HPCBIND_ENABLE_HWLOC_BIND=0 @@ -169,6 +193,10 @@ for i in $@; do HPCBIND_PROC_BIND="${i#*=}" shift ;; + --whole-system) + HPCBIND_HWLOC_PARENT_CPUSET="" + shift + ;; --distribute=*) HPCBIND_DISTRIBUTE="${i#*=}" shift @@ -182,8 +210,8 @@ for i in $@; do HPCBIND_VISIBLE_GPUS=$(echo "${i#*=}" | tr ',' ' ') shift ;; - --gpu-ignore-queue) - HPCBIND_QUEUE_GPU_MAPPING=0 + --ignore-queue) + HPCBIND_QUEUE_MAPPING=0 shift ;; --no-gpu-mapping) @@ -218,14 +246,18 @@ for i in $@; do HPCBIND_OPENMP_NESTED="false" shift ;; - --show-bindings) - HPCBIND_VERBOSE=1 - HPCBIND_SHOW_BINDINGS=1 + --output-prefix=*) + HPCBIND_OUTPUT_PREFIX="${i#*=}" + shift + ;; + --output-mode=*) + HPCBIND_OUTPUT_MODE="${i#*=}" + #convert to lower case + HPCBIND_OUTPUT_MODE="${HPCBIND_OUTPUT_MODE,,}" shift ;; --lstopo) HPCBIND_VERBOSE=1 - HPCBIND_SHOW_BINDINGS=0 HPCBIND_LSTOPO=1 shift ;; @@ -239,6 +271,7 @@ for i in $@; do ;; # ignore remaining arguments --) + HPCBIND_HAS_COMMAND=1 shift break ;; @@ -250,16 +283,41 @@ for i in $@; do esac done +################################################################################ +# Check output mode +################################################################################ +declare -i HPCBIND_TEE=0 + +if [[ "${HPCBIND_OUTPUT_MODE}" == "none" ]]; then + HPCBIND_TEE=0 +elif [[ "${HPCBIND_OUTPUT_MODE}" == "all" ]]; then + HPCBIND_TEE=1 +elif [[ ${HPCBIND_QUEUE_INDEX} -eq 0 ]]; then + #default to rank0 printing to screen + HPCBIND_TEE=1 +fi + + +if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then + HPCBIND_LOG=/dev/null + HPCBIND_ERR=/dev/null + HPCBIND_OUT=/dev/null +else + HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.hpc.log" + HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.err" + HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.out" + > ${HPCBIND_LOG} +fi + ################################################################################ # Check unknown arguments ################################################################################ if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then - echo "Uknown options: ${UNKNOWN_ARGS[*]}" + echo "HPCBIND Uknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG}) exit 1 fi - ################################################################################ # Check that visible gpus are valid ################################################################################ @@ -268,22 +326,19 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then for ((i=0; i < ${#HPCBIND_VISIBLE_GPUS[*]}; i++)); do if [[ ${HPCBIND_VISIBLE_GPUS[$i]} -ge ${NUM_GPUS} || ${HPCBIND_VISIBLE_GPUS[$i]} -lt 0 ]]; then - echo "Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]}, setting to 0" + echo "HPCBIND Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]} (setting to 0)" > >(tee -a ${HPCBIND_LOG}) HPCBIND_VISIBLE_GPUS[$i]=0; fi done NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]} fi - ################################################################################ # Check OpenMP percent ################################################################################ if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then - echo "OpenMP percent < 1, setting to 1" HPCBIND_OPENMP_PERCENT=1 elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then - echo "OpenMP percent > 100, setting to 100" HPCBIND_OPENMP_PERCENT=100 fi @@ -291,15 +346,21 @@ fi # Check distribute ################################################################################ if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then - echo "Invalid input for distribute, changing distribute to 1" HPCBIND_DISTRIBUTE=1 fi -if [[ ${HPCBIND_PARTITION} -ge ${HPCBIND_DISTRIBUTE} ]]; then - echo "Invalid input for distribute-partition, changing to 0" +################################################################################ +#choose the correct partition +################################################################################ +if [[ ${HPCBIND_PARTITION} -lt 0 && ${HPCBIND_QUEUE_MAPPING} -eq 1 ]]; then + HPCBIND_PARTITION=${HPCBIND_QUEUE_INDEX} +elif [[ ${HPCBIND_PARTITION} -lt 0 ]]; then HPCBIND_PARTITION=0 fi +if [[ ${HPCBIND_PARTITION} -ge ${HPCBIND_DISTRIBUTE} ]]; then + HPCBIND_PARTITION=$((HPCBIND_PARTITION % HPCBIND_DISTRIBUTE)) +fi ################################################################################ # Find cpuset and num threads @@ -309,13 +370,17 @@ declare -i HPCBIND_NUM_PUS=0 if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then - BINDING=$(hwloc-calc ${HPCBIND_PROC_BIND}) + BINDING=$(hwloc-calc ${HPCBIND_PROC_BIND[*]}) else - BINDING=$(hwloc-calc --restrict ${HPCBIND_HWLOC_PARENT_CPUSET} ${HPCBIND_PROC_BIND}) + BINDING=$(hwloc-calc --restrict ${HPCBIND_HWLOC_PARENT_CPUSET} ${HPCBIND_PROC_BIND[*]}) fi - CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${HPCBIND_DISTRIBUTE})) - HPCBIND_HWLOC_CPUSET=${CPUSETS[${HPCBIND_PARTITION}]} + if [[ ${HPCBIND_DISTRIBUTE} -gt 1 ]]; then + CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${HPCBIND_DISTRIBUTE})) + HPCBIND_HWLOC_CPUSET="${CPUSETS[${HPCBIND_PARTITION}]}" + else + HPCBIND_HWLOC_CPUSET="${BINDING}" + fi HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l) else HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor) @@ -373,13 +438,13 @@ export OMP_NESTED=${HPCBIND_OPENMP_NESTED} ################################################################################ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then - if [[ ${HPCBIND_QUEUE_GPU_MAPPING} -eq 0 ]]; then + if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]} + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" else declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]} + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" fi fi @@ -389,22 +454,22 @@ fi export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC} export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA} export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS} -export HPCBIND_HWLOC_CPUSET=${HPCBIND_HWLOC_CPUSET} +export HPCBIND_HWLOC_CPUSET="${HPCBIND_HWLOC_CPUSET}" export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE} export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION} if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then export HPCBIND_HWLOC_PARENT_CPUSET="all" else - export HPCBIND_HWLOC_PARENT_CPUSET=${HPCBIND_HWLOC_PARENT_CPUSET} + export HPCBIND_HWLOC_PARENT_CPUSET="${HPCBIND_HWLOC_PARENT_CPUSET}" fi -export HPCBIND_HWLOC_PROC_BIND=${HPCBIND_PROC_BIND} +export HPCBIND_HWLOC_PROC_BIND="${HPCBIND_PROC_BIND}" export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') -export HPCBIND_OPENMP_VERSION=${HPCBIND_OPENMP_VERSION} +export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}" if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX} - export HPCBIND_QUEUE_NAME=${HPCBIND_QUEUE_NAME} - export HPCBIND_QUEUE_GPU_MAPPING=${HPCBIND_QUEUE_GPU_MAPPING} + export HPCBIND_QUEUE_NAME="${HPCBIND_QUEUE_NAME}" + export HPCBIND_QUEUE_MAPPING=${HPCBIND_QUEUE_MAPPING} fi @@ -412,43 +477,63 @@ fi # Print verbose ################################################################################ -if [[ ${HPCBIND_VERBOSE} -eq 1 ]]; then - MY_ENV=$(env | sort) - echo "[HPCBIND]" - echo "${MY_ENV}" | grep -E "^HPCBIND_" - echo "[CUDA]" - echo "${MY_ENV}" | grep -E "^CUDA_" - echo "[OPENMP]" - echo "${MY_ENV}" | grep -E "^OMP_" -fi +TMP_ENV=$(env | sort) +if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then + echo "[HOST]" >> ${HPCBIND_LOG} + hostname -s >> ${HPCBIND_LOG} + echo "[HPCBIND]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^HPCBIND_" >> ${HPCBIND_LOG} + echo "[CUDA]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG} + echo "[OPENMP]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG} -if [[ ${HPCBIND_HAS_HWLOC} -eq 1 && ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then - echo "[BINDINGS]" - hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu -elif [[ ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then - echo "Unable to show bindings, hwloc not available." + if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then + echo "[BINDINGS]" >> ${HPCBIND_LOG} + hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu >> ${HPCBIND_LOG} + else + echo "Unable to show bindings, hwloc not available." >> ${HPCBIND_LOG} + fi +else + echo "[HOST]" > >(tee -a ${HPCBIND_LOG}) + hostname -s > >(tee -a ${HPCBIND_LOG}) + echo "[HPCBIND]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^HPCBIND_" > >(tee -a ${HPCBIND_LOG}) + echo "[CUDA]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG}) + echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG}) + + if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then + echo "[BINDINGS]" > >(tee -a ${HPCBIND_LOG}) + hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu > >(tee -a ${HPCBIND_LOG}) + else + echo "Unable to show bindings, hwloc not available." > >(tee -a ${HPCBIND_LOG}) + fi fi ################################################################################ # Run command ################################################################################ -if [[ ${HPCBIND_LSTOPO} -eq 0 ]]; then - if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then - hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- $@ - else - eval $@ - fi -else - if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then - if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 && ! -z ${DISPLAY} ]]; then - echo "[BINDINGS]" - hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu - hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- lstopo --pid 0 +# must be the last executed command so that the return value is correct +if [[ ${HPCBIND_LSTOPO} -eq 1 && ${HPCBIND_HAS_HWLOC} -eq 1 && ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 && ! -z ${DISPLAY} ]]; then + hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- lstopo --pid 0 +elif [[ ${HPCBIND_HAS_COMMAND} -eq 1 ]]; then + # clear output files + > ${HPCBIND_ERR} + > ${HPCBIND_OUT} + if [[ ${HPCBIND_TEE} -eq 0 ]]; then + if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then + hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR} else - hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} + eval $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR} fi else - echo "Unable to show bindings, hwloc not available." + if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then + hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2) + else + eval $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2) + fi fi fi diff --git a/lib/kokkos/bin/kokkos-bind b/lib/kokkos/bin/kokkos-bind deleted file mode 100755 index b6fe07a1bd1c55d864c66d292da3782cb23eb0a5..0000000000000000000000000000000000000000 --- a/lib/kokkos/bin/kokkos-bind +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env bash - -# check if hwloc commands exist -declare -i HAS_HWLOC=0 -type hwloc-bind >/dev/null 2>&1 -HAS_HWLOC="${HAS_HWLOC} + $?" - -type hwloc-distrib >/dev/null 2>&1 -HAS_HWLOC="${HAS_HWLOC} + $?" - -type hwloc-ls >/dev/null 2>&1 -HAS_HWLOC="${HAS_HWLOC} + $?" - -type hwloc-calc >/dev/null 2>&1 -HAS_HWLOC="${HAS_HWLOC} + $?" - -type hwloc-ps >/dev/null 2>&1 -HAS_HWLOC="${HAS_HWLOC} + $?" - - -#parse args -declare -a UNKNOWN_ARGS=() -declare -i DISTRIBUTE=1 -declare -i INDEX=0 -PROC_BIND="all" -CURRENT_CPUSET="" -OPENMP_VERSION=4.0 -OPENMP_PROC_BIND=True -OPENMP_NESTED=True -VERBOSE=False - -#get the current process cpuset -if [[ ${HAS_HWLOC} -eq 0 ]]; then - MY_PID="$BASHPID" - CURRENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2) - echo "$CURRENT_CPUSET" -fi - -function show_help { - local cmd=$(basename "$0") - echo "Usage: ${cmd} <options> -- command ..." - echo " Uses hwloc to divide the node into the given number of groups," - echo " set the appropriate OMP_NUM_THREADS and execute the command on the" - echo " selected group." - echo "" - echo " NOTE: This command assumes it has exclusive use of the node" - echo "" - echo "Options:" - echo " --proc-bind=<LOC> Set the initial process mask for the script. " - echo " LOC can be any valid location argumnet for" - echo " hwloc-calc. Defaults to the entire machine" - echo " --distribute=N Distribute the current proc-bind into N groups" - echo " --index=I Use the i'th group (zero based)" - echo " --openmp=M.m Set env variables for the given OpenMP version" - echo " (default 4.0)" - echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" - echo " --no-openmp-nested Set OMP_NESTED to false" - echo " -v|--verbose" - echo " -h|--help" - echo "" - echo "Sample Usage:" - echo " ${cmd} --distribute=4 --index=2 -v -- command ..." - echo "" -} - -if [[ "$#" -eq 0 ]]; then - show_help - exit 0 -fi - - -for i in $@; do - case $i in - # number of partitions to create - --proc-bind=*) - PROC_BIND="${i#*=}" - shift - ;; - --distribute=*) - DISTRIBUTE="${i#*=}" - shift - ;; - # which group to use - --index=*) - INDEX="${i#*=}" - shift - ;; - --openmp=*) - OPENMP_VERSION="${i#*=}" - shift - ;; - --no-openmp-proc-bind) - OPENMP_PROC_BIND=False - shift - ;; - --no-openmp-nested) - OPENMP_NESTED=False - shift - ;; - -v|--verbose) - VERBOSE=True - shift - ;; - -h|--help) - show_help - exit 0 - ;; - # ignore remaining arguments - --) - shift - break - ;; - # unknown option - *) - UNKNOWN_ARGS+=("$i") - shift - ;; - esac -done - -if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then - echo "Uknown options: ${UNKNOWN_ARGS[*]}" - exit 1 -fi - -if [[ ${DISTRIBUTE} -le 0 ]]; then - echo "Invalid input for distribute, changing distribute to 1" - DISTRIBUTE=1 -fi - -if [[ ${INDEX} -ge ${DISTRIBUTE} ]]; then - echo "Invalid input for index, changing index to 0" - INDEX=0 -fi - -if [[ ${HAS_HWLOC} -ne 0 ]]; then - echo "hwloc not found, no process binding will occur" - DISTRIBUTE=1 - INDEX=0 -fi - -if [[ ${HAS_HWLOC} -eq 0 ]]; then - - if [[ "${CURRENT_CPUSET}" == "" ]]; then - BINDING=$(hwloc-calc ${PROC_BIND}) - else - BINDING=$(hwloc-calc --restrict ${CURRENT_CPUSET} ${PROC_BIND}) - fi - - CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${DISTRIBUTE})) - CPUSET=${CPUSETS[${INDEX}]} - NUM_THREADS=$(hwloc-ls --restrict ${CPUSET} --only pu | wc -l) - - if [[ "${VERBOSE}" == "True" ]]; then - echo "hwloc: true" - echo " proc_bind: ${PROC_BIND}" - echo " distribute: ${DISTRIBUTE}" - echo " index: ${INDEX}" - echo " parent_cpuset: ${CURRENT_CPUSET}" - echo " cpuset: ${CPUSET}" - echo "omp_num_threads: ${NUM_THREADS}" - echo "omp_proc_bind: ${OPENMP_PROC_BIND}" - echo "omp_nested: ${OPENMP_NESTED}" - echo "OpenMP: ${OPENMP_VERSION}" - fi - - # set OMP env - if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then - if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then - export OMP_PLACES="threads" - export OMP_PROC_BIND="spread" - else - export OMP_PROC_BIND="true" - unset OMP_PLACES - fi - else - unset OMP_PLACES - unset OMP_PROC_BIND - fi - if [[ "${OPENMP_NESTED}" == "True" ]]; then - export OMP_NESTED="true" - else - export OMP_NESTED="false" - fi - export OMP_NUM_THREADS="${NUM_THREADS}" - - hwloc-bind ${CPUSET} -- $@ -else - NUM_THREADS=$(cat /proc/cpuinfo | grep -c processor) - - if [[ "${VERBOSE}" == "True" ]]; then - echo "hwloc: false" - echo "omp_num_threads: ${NUM_THREADS}" - echo "omp_proc_bind: ${OPENMP_PROC_BIND}" - echo "omp_nested: ${OPENMP_NESTED}" - echo "OpenMP: ${OPENMP_VERSION}" - fi - - # set OMP env - if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then - if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then - export OMP_PLACES="threads" - export OMP_PROC_BIND="spread" - else - export OMP_PROC_BIND="true" - unset OMP_PLACES - fi - else - unset OMP_PLACES - unset OMP_PROC_BIND - fi - if [[ "${OPENMP_NESTED}" == "True" ]]; then - export OMP_NESTED="true" - else - export OMP_NESTED="false" - fi - export OMP_NUM_THREADS="${NUM_THREADS}" - - eval $@ -fi - diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper deleted file mode 100755 index fc72bca5b91662b5ad31ac3ecb19c62f0d6c8996..0000000000000000000000000000000000000000 --- a/lib/kokkos/bin/nvcc_wrapper +++ /dev/null @@ -1,287 +0,0 @@ -#!/bin/bash -# -# This shell script (nvcc_wrapper) wraps both the host compiler and -# NVCC, if you are building legacy C or C++ code with CUDA enabled. -# The script remedies some differences between the interface of NVCC -# and that of the host compiler, in particular for linking. -# It also means that a legacy code doesn't need separate .cu files; -# it can just use .cpp files. -# -# Default settings: change those according to your machine. For -# example, you may have have two different wrappers with either icpc -# or g++ as their back-end compiler. The defaults can be overwritten -# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). - -default_arch="sm_61" -#default_arch="sm_50" - -# -# The default C++ compiler. -# -host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} -#host_compiler="icpc" -#host_compiler="/usr/local/gcc/4.8.3/bin/g++" -#host_compiler="/usr/local/gcc/4.9.1/bin/g++" - -# -# Internal variables -# - -# C++ files -cpp_files="" - -# Host compiler arguments -xcompiler_args="" - -# Cuda (NVCC) only arguments -cuda_args="" - -# Arguments for both NVCC and Host compiler -shared_args="" - -# Linker arguments -xlinker_args="" - -# Object files passable to NVCC -object_files="" - -# Link objects for the host linker only -object_files_xlinker="" - -# Shared libraries with version numbers are not handled correctly by NVCC -shared_versioned_libraries_host="" -shared_versioned_libraries="" - -# Does the User set the architecture -arch_set=0 - -# Does the user overwrite the host compiler -ccbin_set=0 - -#Error code of compilation -error_code=0 - -# Do a dry run without actually compiling -dry_run=0 - -# Skip NVCC compilation and use host compiler directly -host_only=0 - -# Enable workaround for CUDA 6.5 for pragma ident -replace_pragma_ident=0 - -# Mark first host compiler argument -first_xcompiler_arg=1 - -temp_dir=${TMPDIR:-/tmp} - -# Check if we have an optimization argument already -optimization_applied=0 - -#echo "Arguments: $# $@" - -while [ $# -gt 0 ] -do - case $1 in - #show the executed command - --show|--nvcc-wrapper-show) - dry_run=1 - ;; - #run host compilation only - --host-only) - host_only=1 - ;; - #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros - --replace-pragma-ident) - replace_pragma_ident=1 - ;; - #handle source files to be compiled as cuda files - *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) - cpp_files="$cpp_files $1" - ;; - # Ensure we only have one optimization flag because NVCC doesn't allow muliple - -O*) - if [ $optimization_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." - else - shared_args="$shared_args $1" - optimization_applied=1 - fi - ;; - #Handle shared args (valid for both nvcc and the host compiler) - -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) - shared_args="$shared_args $1" - ;; - #Handle shared args that have an argument - -o|-MT) - shared_args="$shared_args $1 $2" - shift - ;; - #Handle known nvcc args - -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) - cuda_args="$cuda_args $1" - ;; - #Handle more known nvcc args - --expt-extended-lambda|--expt-relaxed-constexpr) - cuda_args="$cuda_args $1" - ;; - #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream) - cuda_args="$cuda_args $1 $2" - shift - ;; - #Handle c++11 setting - --std=c++11|-std=c++11) - shared_args="$shared_args $1" - ;; - #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 - -std=c++98|--std=c++98) - ;; - #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor - -pedantic|-Wpedantic|-ansi) - ;; - #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" - -Woverloaded-virtual) - ;; - #strip -Xcompiler because we add it - -Xcompiler) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$2" - fi - shift - ;; - #strip of "-x cu" because we add that - -x) - if [[ $2 != "cu" ]]; then - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="-x,$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,-x,$2" - fi - fi - shift - ;; - #Handle -ccbin (if its not set we can set it to a default value) - -ccbin) - cuda_args="$cuda_args $1 $2" - ccbin_set=1 - host_compiler=$2 - shift - ;; - #Handle -arch argument (if its not set use a default - -arch*) - cuda_args="$cuda_args $1" - arch_set=1 - ;; - #Handle -Xcudafe argument - -Xcudafe) - cuda_args="$cuda_args -Xcudafe $2" - shift - ;; - #Handle args that should be sent to the linker - -Wl*) - xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" - host_linker_args="$host_linker_args ${1:4:${#1}}" - ;; - #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking - *.a|*.so|*.o|*.obj) - object_files="$object_files $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - @*|*.dylib) - object_files="$object_files -Xlinker $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle shared libraries with *.so.* names which nvcc can't do. - *.so.*) - shared_versioned_libraries_host="$shared_versioned_libraries_host $1" - shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" - ;; - #All other args are sent to the host compiler - *) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args=$1 - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$1" - fi - ;; - esac - - shift -done - -#Add default host compiler if necessary -if [ $ccbin_set -ne 1 ]; then - cuda_args="$cuda_args -ccbin $host_compiler" -fi - -#Add architecture command -if [ $arch_set -ne 1 ]; then - cuda_args="$cuda_args -arch=$default_arch" -fi - -#Compose compilation command -nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" -if [ $first_xcompiler_arg -eq 0 ]; then - nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" -fi - -#Compose host only command -host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" - -#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' -if [ $replace_pragma_ident -eq 1 ]; then - cpp_files2="" - for file in $cpp_files - do - var=`grep pragma ${file} | grep ident | grep "#"` - if [ "${#var}" -gt 0 ] - then - sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file - cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" - else - cpp_files2="$cpp_files2 $file" - fi - done - cpp_files=$cpp_files2 - #echo $cpp_files -fi - -if [ "$cpp_files" ]; then - nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" -else - nvcc_command="$nvcc_command $object_files" -fi - -if [ "$cpp_files" ]; then - host_command="$host_command $object_files $cpp_files" -else - host_command="$host_command $object_files" -fi - -#Print command for dryrun -if [ $dry_run -eq 1 ]; then - if [ $host_only -eq 1 ]; then - echo $host_command - else - echo $nvcc_command - fi - exit 0 -fi - -#Run compilation command -if [ $host_only -eq 1 ]; then - $host_command -else - $nvcc_command -fi -error_code=$? - -#Report error code -exit $error_code diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index 96b05c02e1fae8d1c4cb9cb914b24786f7b4a1c9..6f9ca897d9f89ec3501db6c9f754d61a9182e511 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -9,3 +9,4 @@ tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 tag: 2.03.05 date: 05:27:2017 master: 36b92f43 develop: 79073186 tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a tag: 2.04.00 date: 08:16:2017 master: 54eb75c0 develop: 32fb8ee1 +tag: 2.04.04 date: 09:11:2017 master: 2b7e9c20 develop: 51e7b25a diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper deleted file mode 100755 index 3506c3fd378eb78167b512f33167f2a830ed1c34..0000000000000000000000000000000000000000 --- a/lib/kokkos/config/nvcc_wrapper +++ /dev/null @@ -1,284 +0,0 @@ -#!/bin/bash -# -# This shell script (nvcc_wrapper) wraps both the host compiler and -# NVCC, if you are building legacy C or C++ code with CUDA enabled. -# The script remedies some differences between the interface of NVCC -# and that of the host compiler, in particular for linking. -# It also means that a legacy code doesn't need separate .cu files; -# it can just use .cpp files. -# -# Default settings: change those according to your machine. For -# example, you may have have two different wrappers with either icpc -# or g++ as their back-end compiler. The defaults can be overwritten -# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). - -default_arch="sm_61" -#default_arch="sm_50" - -# -# The default C++ compiler. -# -host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} -#host_compiler="icpc" -#host_compiler="/usr/local/gcc/4.8.3/bin/g++" -#host_compiler="/usr/local/gcc/4.9.1/bin/g++" - -# -# Internal variables -# - -# C++ files -cpp_files="" - -# Host compiler arguments -xcompiler_args="" - -# Cuda (NVCC) only arguments -cuda_args="" - -# Arguments for both NVCC and Host compiler -shared_args="" - -# Linker arguments -xlinker_args="" - -# Object files passable to NVCC -object_files="" - -# Link objects for the host linker only -object_files_xlinker="" - -# Shared libraries with version numbers are not handled correctly by NVCC -shared_versioned_libraries_host="" -shared_versioned_libraries="" - -# Does the User set the architecture -arch_set=0 - -# Does the user overwrite the host compiler -ccbin_set=0 - -#Error code of compilation -error_code=0 - -# Do a dry run without actually compiling -dry_run=0 - -# Skip NVCC compilation and use host compiler directly -host_only=0 - -# Enable workaround for CUDA 6.5 for pragma ident -replace_pragma_ident=0 - -# Mark first host compiler argument -first_xcompiler_arg=1 - -temp_dir=${TMPDIR:-/tmp} - -# Check if we have an optimization argument already -optimization_applied=0 - -#echo "Arguments: $# $@" - -while [ $# -gt 0 ] -do - case $1 in - #show the executed command - --show|--nvcc-wrapper-show) - dry_run=1 - ;; - #run host compilation only - --host-only) - host_only=1 - ;; - #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros - --replace-pragma-ident) - replace_pragma_ident=1 - ;; - #handle source files to be compiled as cuda files - *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) - cpp_files="$cpp_files $1" - ;; - # Ensure we only have one optimization flag because NVCC doesn't allow muliple - -O*) - if [ $optimization_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." - else - shared_args="$shared_args $1" - optimization_applied=1 - fi - ;; - #Handle shared args (valid for both nvcc and the host compiler) - -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) - shared_args="$shared_args $1" - ;; - #Handle shared args that have an argument - -o|-MT) - shared_args="$shared_args $1 $2" - shift - ;; - #Handle known nvcc args - -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) - cuda_args="$cuda_args $1" - ;; - #Handle more known nvcc args - --expt-extended-lambda|--expt-relaxed-constexpr) - cuda_args="$cuda_args $1" - ;; - #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream) - cuda_args="$cuda_args $1 $2" - shift - ;; - #Handle c++11 setting - --std=c++11|-std=c++11) - shared_args="$shared_args $1" - ;; - #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 - -std=c++98|--std=c++98) - ;; - #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor - -pedantic|-Wpedantic|-ansi) - ;; - #strip -Xcompiler because we add it - -Xcompiler) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$2" - fi - shift - ;; - #strip of "-x cu" because we add that - -x) - if [[ $2 != "cu" ]]; then - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="-x,$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,-x,$2" - fi - fi - shift - ;; - #Handle -ccbin (if its not set we can set it to a default value) - -ccbin) - cuda_args="$cuda_args $1 $2" - ccbin_set=1 - host_compiler=$2 - shift - ;; - #Handle -arch argument (if its not set use a default - -arch*) - cuda_args="$cuda_args $1" - arch_set=1 - ;; - #Handle -Xcudafe argument - -Xcudafe) - cuda_args="$cuda_args -Xcudafe $2" - shift - ;; - #Handle args that should be sent to the linker - -Wl*) - xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" - host_linker_args="$host_linker_args ${1:4:${#1}}" - ;; - #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking - *.a|*.so|*.o|*.obj) - object_files="$object_files $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - *.dylib) - object_files="$object_files -Xlinker $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle shared libraries with *.so.* names which nvcc can't do. - *.so.*) - shared_versioned_libraries_host="$shared_versioned_libraries_host $1" - shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" - ;; - #All other args are sent to the host compiler - *) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args=$1 - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$1" - fi - ;; - esac - - shift -done - -#Add default host compiler if necessary -if [ $ccbin_set -ne 1 ]; then - cuda_args="$cuda_args -ccbin $host_compiler" -fi - -#Add architecture command -if [ $arch_set -ne 1 ]; then - cuda_args="$cuda_args -arch=$default_arch" -fi - -#Compose compilation command -nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" -if [ $first_xcompiler_arg -eq 0 ]; then - nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" -fi - -#Compose host only command -host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" - -#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' -if [ $replace_pragma_ident -eq 1 ]; then - cpp_files2="" - for file in $cpp_files - do - var=`grep pragma ${file} | grep ident | grep "#"` - if [ "${#var}" -gt 0 ] - then - sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file - cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" - else - cpp_files2="$cpp_files2 $file" - fi - done - cpp_files=$cpp_files2 - #echo $cpp_files -fi - -if [ "$cpp_files" ]; then - nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" -else - nvcc_command="$nvcc_command $object_files" -fi - -if [ "$cpp_files" ]; then - host_command="$host_command $object_files $cpp_files" -else - host_command="$host_command $object_files" -fi - -#Print command for dryrun -if [ $dry_run -eq 1 ]; then - if [ $host_only -eq 1 ]; then - echo $host_command - else - echo $nvcc_command - fi - exit 0 -fi - -#Run compilation command -if [ $host_only -eq 1 ]; then - $host_command -else - $nvcc_command -fi -error_code=$? - -#Report error code -exit $error_code diff --git a/lib/kokkos/config/trilinos-integration/checkin-test b/lib/kokkos/config/trilinos-integration/checkin-test index 92a1b1c06882d3ee73e9c27f5054bd3544acdd0e..ffb565fcbbbb85f881053828d34208bd8e4b9e7e 100644 --- a/lib/kokkos/config/trilinos-integration/checkin-test +++ b/lib/kokkos/config/trilinos-integration/checkin-test @@ -1,4 +1,4 @@ module purge -module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.59.0/base +module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.63.0/base sems-yaml_cpp sems-superlu #Run Trilinos CheckinTest diff --git a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp index 0408472c680c2c2f46e6cd32c8147a0533c28917..996b6b5610230832a962f2bde624f874bbd540cb 100644 --- a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp +++ b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp @@ -125,6 +125,123 @@ namespace Impl { }; } +/// \class GraphRowViewConst +/// \brief View of a row of a sparse graph. +/// \tparam GraphType Sparse graph type, such as (but not limited to) StaticCrsGraph. +/// +/// This class provides a generic view of a row of a sparse graph. +/// We intended this class to view a row of a StaticCrsGraph, but +/// GraphType need not necessarily be CrsMatrix. +/// +/// The row view is suited for computational kernels like sparse +/// matrix-vector multiply, as well as for modifying entries in the +/// sparse matrix. The view is always const as it does not allow graph modification. +/// +/// Here is an example loop over the entries in the row: +/// \code +/// typedef typename GraphRowViewConst<MatrixType>::ordinal_type ordinal_type; +/// +/// GraphRowView<GraphType> G_i = ...; +/// const ordinal_type numEntries = G_i.length; +/// for (ordinal_type k = 0; k < numEntries; ++k) { +/// ordinal_type j = G_i.colidx (k); +/// // ... do something with A_ij and j ... +/// } +/// \endcode +/// +/// GraphType must provide the \c data_type +/// typedefs. In addition, it must make sense to use GraphRowViewConst to +/// view a row of GraphType. In particular, column +/// indices of a row must be accessible using the <tt>entries</tt> +/// resp. <tt>colidx</tt> arrays given to the constructor of this +/// class, with a constant <tt>stride</tt> between successive entries. +/// The stride is one for the compressed sparse row storage format (as +/// is used by CrsMatrix), but may be greater than one for other +/// sparse matrix storage formats (e.g., ELLPACK or jagged diagonal). +template<class GraphType> +struct GraphRowViewConst { + //! The type of the column indices in the row. + typedef const typename GraphType::data_type ordinal_type; + +private: + //! Array of (local) column indices in the row. + ordinal_type* colidx_; + /// \brief Stride between successive entries in the row. + /// + /// For compressed sparse row (CSR) storage, this is always one. + /// This might be greater than one for storage formats like ELLPACK + /// or Jagged Diagonal. Nevertheless, the stride can never be + /// greater than the number of rows or columns in the matrix. Thus, + /// \c ordinal_type is the correct type. + const ordinal_type stride_; + +public: + /// \brief Constructor + /// + /// \param values [in] Array of the row's values. + /// \param colidx [in] Array of the row's column indices. + /// \param stride [in] (Constant) stride between matrix entries in + /// each of the above arrays. + /// \param count [in] Number of entries in the row. + KOKKOS_INLINE_FUNCTION + GraphRowViewConst ( ordinal_type* const colidx_in, + const ordinal_type& stride, + const ordinal_type& count) : + colidx_ (colidx_in), stride_ (stride), length (count) + {} + + /// \brief Constructor with offset into \c colidx array + /// + /// \param colidx [in] Array of the row's column indices. + /// \param stride [in] (Constant) stride between matrix entries in + /// each of the above arrays. + /// \param count [in] Number of entries in the row. + /// \param idx [in] Start offset into \c colidx array + /// + /// \tparam OffsetType The type of \c idx (see above). Must be a + /// built-in integer type. This may differ from ordinal_type. + /// For example, the matrix may have dimensions that fit in int, + /// but a number of entries that does not fit in int. + template<class OffsetType> + KOKKOS_INLINE_FUNCTION + GraphRowViewConst ( const typename GraphType::entries_type& colidx_in, + const ordinal_type& stride, + const ordinal_type& count, + const OffsetType& idx, + const typename std::enable_if<std::is_integral<OffsetType>::value, int>::type& = 0) : + colidx_ (&colidx_in(idx)), stride_ (stride), length (count) + {} + + /// \brief Number of entries in the row. + /// + /// This is a public const field rather than a public const method, + /// in order to avoid possible overhead of a method call if the + /// compiler is unable to inline that method call. + /// + /// We assume that rows contain no duplicate entries (i.e., entries + /// with the same column index). Thus, a row may have up to + /// A.numCols() entries. This means that the correct type of + /// 'length' is ordinal_type. + const ordinal_type length; + + /// \brief (Const) reference to the column index of entry i in this + /// row of the sparse matrix. + /// + /// "Entry i" is not necessarily the entry with column index i, nor + /// does i necessarily correspond to the (local) row index. + KOKKOS_INLINE_FUNCTION + ordinal_type& colidx (const ordinal_type& i) const { + return colidx_[i*stride_]; + } + + /// \brief An alias for colidx + KOKKOS_INLINE_FUNCTION + ordinal_type& operator()(const ordinal_type& i) const { + return colidx(i); + } +}; + + /// \class StaticCrsGraph /// \brief Compressed row storage array. /// @@ -218,6 +335,38 @@ public: static_cast<size_type> (0); } + /// \brief Return a const view of row i of the graph. + /// + /// If row i does not belong to the graph, return an empty view. + /// + /// The returned object \c view implements the following interface: + /// <ul> + /// <li> \c view.length is the number of entries in the row </li> + /// <li> \c view.colidx(k) returns a const reference to the + /// column index of the k-th entry in the row </li> + /// </ul> + /// k is not a column index; it just counts from 0 to + /// <tt>view.length - 1</tt>. + /// + /// Users should not rely on the return type of this method. They + /// should instead assign to 'auto'. That allows compile-time + /// polymorphism for different kinds of sparse matrix formats (e.g., + /// ELLPACK or Jagged Diagonal) that we may wish to support in the + /// future. + KOKKOS_INLINE_FUNCTION + GraphRowViewConst<StaticCrsGraph> rowConst (const data_type i) const { + const size_type start = row_map(i); + // count is guaranteed to fit in ordinal_type, as long as no row + // has duplicate entries. + const data_type count = static_cast<data_type> (row_map(i+1) - start); + + if (count == 0) { + return GraphRowViewConst<StaticCrsGraph> (NULL, 1, 0); + } else { + return GraphRowViewConst<StaticCrsGraph> (entries, 1, count, start); + } + } + /** \brief Create a row partitioning into a given number of blocks * balancing non-zeros + a fixed cost per row. */ diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp index 46321378d9b4003c14c0165e0ef077e693a0b26a..c184c14d078fc540b84fa1c47cd6fa15122ce8df 100644 --- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp +++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp @@ -91,11 +91,11 @@ struct DeviceIterateTile<2,RP,Functor,void > // LL if (RP::inner_direction == RP::Left) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { m_func(offset_0 , offset_1); } @@ -106,11 +106,11 @@ struct DeviceIterateTile<2,RP,Functor,void > // LR else { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { m_func(offset_0 , offset_1); } @@ -143,11 +143,11 @@ struct DeviceIterateTile<2,RP,Functor,Tag> if (RP::inner_direction == RP::Left) { // Loop over size maxnumblocks until full range covered for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { m_func(Tag(), offset_0 , offset_1); } @@ -157,11 +157,11 @@ struct DeviceIterateTile<2,RP,Functor,Tag> } else { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { m_func(Tag(), offset_0 , offset_1); } @@ -196,15 +196,15 @@ struct DeviceIterateTile<3,RP,Functor,void > // LL if (RP::inner_direction == RP::Left) { for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { m_func(offset_0 , offset_1 , offset_2); } @@ -217,15 +217,15 @@ struct DeviceIterateTile<3,RP,Functor,void > // LR else { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) { m_func(offset_0 , offset_1 , offset_2); } @@ -259,15 +259,15 @@ struct DeviceIterateTile<3,RP,Functor,Tag> { if (RP::inner_direction == RP::Left) { for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { m_func(Tag(), offset_0 , offset_1 , offset_2); } @@ -279,15 +279,15 @@ struct DeviceIterateTile<3,RP,Functor,Tag> } else { for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { - const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x; + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) { for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) { for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) { m_func(Tag(), offset_0 , offset_1 , offset_2); } @@ -340,19 +340,19 @@ struct DeviceIterateTile<4,RP,Functor,void > const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0]; for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { - const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z; + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) { for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) { for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3); } @@ -378,19 +378,19 @@ struct DeviceIterateTile<4,RP,Functor,void > const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) { for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { - const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z; + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3); } @@ -442,19 +442,19 @@ struct DeviceIterateTile<4,RP,Functor,Tag> const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0]; for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { - const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z; + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) { for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(Tag(), offset_0 , offset_1 , offset_2 , offset_3); } @@ -479,19 +479,19 @@ struct DeviceIterateTile<4,RP,Functor,Tag> const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { - const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y; + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) { for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { - const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z; + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) { m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3); } @@ -558,23 +558,23 @@ struct DeviceIterateTile<5,RP,Functor,void > const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2]; for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { - const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z; + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4); } @@ -613,23 +613,23 @@ struct DeviceIterateTile<5,RP,Functor,void > const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { - const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z; + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4); } @@ -695,23 +695,23 @@ struct DeviceIterateTile<5,RP,Functor,Tag> const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2]; for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { - const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z; + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4); } @@ -750,23 +750,23 @@ struct DeviceIterateTile<5,RP,Functor,Tag> const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { - const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z; + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) { m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4); } @@ -845,27 +845,27 @@ struct DeviceIterateTile<6,RP,Functor,void > const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4]; for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { - const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5]; if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { - const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); } @@ -917,27 +917,27 @@ struct DeviceIterateTile<6,RP,Functor,void > const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { - const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { - const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5]; if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); } @@ -1016,27 +1016,27 @@ struct DeviceIterateTile<6,RP,Functor,Tag> const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4]; for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { - const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5]; if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { - const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); } @@ -1088,27 +1088,27 @@ struct DeviceIterateTile<6,RP,Functor,Tag> const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5]; for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { - const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0]; if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { - const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1]; if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { - const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2]; if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { - const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3]; if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { - const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4]; if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { - const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5]; if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp index cae8ecd489f7917fd3ccc1c0f6628000f6351773..079d9f0889b4e36090593d5458a884545c7635ae 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -164,7 +164,7 @@ static void cuda_parallel_launch_constant_memory() template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > __global__ -__launch_bounds__(maxTperB, minBperSM) +//__launch_bounds__(maxTperB, minBperSM) static void cuda_parallel_launch_constant_memory() { const DriverType & driver = @@ -182,7 +182,7 @@ static void cuda_parallel_launch_local_memory( const DriverType driver ) template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > __global__ -__launch_bounds__(maxTperB, minBperSM) +//__launch_bounds__(maxTperB, minBperSM) static void cuda_parallel_launch_local_memory( const DriverType driver ) { driver(); diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp index 26b47a8b749e7340692e5d9a6a13273cb0e0f8f4..f8355f0d069f6399db0fc78e59f68d10e9e183ed 100644 --- a/lib/kokkos/core/src/Kokkos_Complex.hpp +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -242,45 +242,89 @@ public: re_ = v; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - complex<RealType>& operator += (const complex<RealType>& src) { + complex<RealType>& + operator += (const complex<InputRealType>& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ += src.re_; im_ += src.im_; return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - void operator += (const volatile complex<RealType>& src) volatile { + void + operator += (const volatile complex<InputRealType>& src) volatile { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ += src.re_; im_ += src.im_; } KOKKOS_INLINE_FUNCTION - complex<RealType>& operator += (const RealType& src) { - re_ += src; + complex<RealType>& + operator += (const std::complex<RealType>& src) { + re_ += src.real(); + im_ += src.imag(); return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - void operator += (const volatile RealType& src) volatile { + complex<RealType>& + operator += (const InputRealType& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ += src; + return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - complex<RealType>& operator -= (const complex<RealType>& src) { + void + operator += (const volatile InputRealType& src) volatile { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + re_ += src; + } + + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& + operator -= (const complex<InputRealType>& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ -= src.re_; im_ -= src.im_; return *this; } KOKKOS_INLINE_FUNCTION - complex<RealType>& operator -= (const RealType& src) { + complex<RealType>& + operator -= (const std::complex<RealType>& src) { + re_ -= src.real(); + im_ -= src.imag(); + return *this; + } + + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& + operator -= (const InputRealType& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ -= src; return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - complex<RealType>& operator *= (const complex<RealType>& src) { + complex<RealType>& + operator *= (const complex<InputRealType>& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); const RealType realPart = re_ * src.re_ - im_ * src.im_; const RealType imagPart = re_ * src.im_ + im_ * src.re_; re_ = realPart; @@ -288,8 +332,12 @@ public: return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - void operator *= (const volatile complex<RealType>& src) volatile { + void + operator *= (const volatile complex<InputRealType>& src) volatile { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); const RealType realPart = re_ * src.re_ - im_ * src.im_; const RealType imagPart = re_ * src.im_ + im_ * src.re_; re_ = realPart; @@ -297,20 +345,70 @@ public: } KOKKOS_INLINE_FUNCTION - complex<RealType>& operator *= (const RealType& src) { + complex<RealType>& + operator *= (const std::complex<RealType>& src) { + const RealType realPart = re_ * src.real() - im_ * src.imag(); + const RealType imagPart = re_ * src.imag() + im_ * src.real(); + re_ = realPart; + im_ = imagPart; + return *this; + } + + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& + operator *= (const InputRealType& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ *= src; im_ *= src; return *this; } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - void operator *= (const volatile RealType& src) volatile { + void + operator *= (const volatile InputRealType& src) volatile { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); re_ *= src; im_ *= src; } + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + complex<RealType>& + operator /= (const complex<InputRealType>& y) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + + // Scale (by the "1-norm" of y) to avoid unwarranted overflow. + // If the real part is +/-Inf and the imaginary part is -/+Inf, + // this won't change the result. + const RealType s = std::fabs (y.real ()) + std::fabs (y.imag ()); + + // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. + // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, + // because y/s is NaN. + if (s == 0.0) { + this->re_ /= s; + this->im_ /= s; + } + else { + const complex<RealType> x_scaled (this->re_ / s, this->im_ / s); + const complex<RealType> y_conj_scaled (y.re_ / s, -(y.im_) / s); + const RealType y_scaled_abs = y_conj_scaled.re_ * y_conj_scaled.re_ + + y_conj_scaled.im_ * y_conj_scaled.im_; // abs(y) == abs(conj(y)) + *this = x_scaled * y_conj_scaled; + *this /= y_scaled_abs; + } + return *this; + } + KOKKOS_INLINE_FUNCTION - complex<RealType>& operator /= (const complex<RealType>& y) { + complex<RealType>& + operator /= (const std::complex<RealType>& y) { + // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. @@ -334,57 +432,95 @@ public: return *this; } + + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - complex<RealType>& operator /= (const RealType& src) { + complex<RealType>& + operator /= (const InputRealType& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + re_ /= src; im_ /= src; return *this; } + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + bool + operator == (const complex<InputRealType>& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + + return (re_ == static_cast<RealType>(src.re_)) && (im_ == static_cast<RealType>(src.im_)); + } + KOKKOS_INLINE_FUNCTION - bool operator == (const complex<RealType>& src) { - return (re_ == src.re_) && (im_ == src.im_); + bool + operator == (const std::complex<RealType>& src) { + return (re_ == src.real()) && (im_ == src.imag()); } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - bool operator == (const RealType src) { - return (re_ == src) && (im_ == RealType(0)); + bool + operator == (const InputRealType src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + + return (re_ == static_cast<RealType>(src)) && (im_ == RealType(0)); } + template<typename InputRealType> KOKKOS_INLINE_FUNCTION - bool operator != (const complex<RealType>& src) { - return (re_ != src.re_) || (im_ != src.im_); + bool + operator != (const complex<InputRealType>& src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + + return (re_ != static_cast<RealType>(src.re_)) || (im_ != static_cast<RealType>(src.im_)); } KOKKOS_INLINE_FUNCTION - bool operator != (const RealType src) { - return (re_ != src) || (im_ != RealType(0)); + bool + operator != (const std::complex<RealType>& src) { + return (re_ != src.real()) || (im_ != src.imag()); } + template<typename InputRealType> + KOKKOS_INLINE_FUNCTION + bool + operator != (const InputRealType src) { + static_assert(std::is_convertible<InputRealType,RealType>::value, + "InputRealType must be convertible to RealType"); + + return (re_ != static_cast<RealType>(src)) || (im_ != RealType(0)); + } + }; //! Binary + operator for complex complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator + (const complex<RealType>& x, const complex<RealType>& y) { - return complex<RealType> (x.real () + y.real (), x.imag () + y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator + (const complex<RealType1>& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type > (x.real () + y.real (), x.imag () + y.imag ()); } //! Binary + operator for complex scalar. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator + (const complex<RealType>& x, const RealType& y) { - return complex<RealType> (x.real () + y , x.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator + (const complex<RealType1>& x, const RealType2& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () + y , x.imag ()); } //! Binary + operator for scalar complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator + (const RealType& x, const complex<RealType>& y) { - return complex<RealType> (x + y.real (), y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator + (const RealType1& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x + y.real (), y.imag ()); } //! Unary + operator for complex. @@ -396,27 +532,27 @@ operator + (const complex<RealType>& x) { } //! Binary - operator for complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator - (const complex<RealType>& x, const complex<RealType>& y) { - return complex<RealType> (x.real () - y.real (), x.imag () - y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator - (const complex<RealType1>& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () - y.real (), x.imag () - y.imag ()); } //! Binary - operator for complex scalar. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator - (const complex<RealType>& x, const RealType& y) { - return complex<RealType> (x.real () - y , x.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator - (const complex<RealType1>& x, const RealType2& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () - y , x.imag ()); } //! Binary - operator for scalar complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator - (const RealType& x, const complex<RealType>& y) { - return complex<RealType> (x - y.real (), - y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator - (const RealType1& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x - y.real (), - y.imag ()); } //! Unary - operator for complex. @@ -428,12 +564,12 @@ operator - (const complex<RealType>& x) { } //! Binary * operator for complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator * (const complex<RealType>& x, const complex<RealType>& y) { - return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (), - x.real () * y.imag () + x.imag () * y.real ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator * (const complex<RealType1>& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () * y.real () - x.imag () * y.imag (), + x.real () * y.imag () + x.imag () * y.real ()); } /// \brief Binary * operator for std::complex and complex. @@ -446,33 +582,34 @@ operator * (const complex<RealType>& x, const complex<RealType>& y) { /// This function cannot be called in a CUDA device function, because /// std::complex's methods and nonmember functions are not marked as /// CUDA device functions. -template<class RealType> -complex<RealType> -operator * (const std::complex<RealType>& x, const complex<RealType>& y) { - return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (), - x.real () * y.imag () + x.imag () * y.real ()); +template<class RealType1, class RealType2> +inline +complex<typename std::common_type<RealType1,RealType2>::type> +operator * (const std::complex<RealType1>& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () * y.real () - x.imag () * y.imag (), + x.real () * y.imag () + x.imag () * y.real ()); } /// \brief Binary * operator for RealType times complex. /// /// This function exists because the compiler doesn't know that /// RealType and complex<RealType> commute with respect to operator*. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator * (const RealType& x, const complex<RealType>& y) { - return complex<RealType> (x * y.real (), x * y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator * (const RealType1& x, const complex<RealType2>& y) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x * y.real (), x * y.imag ()); } /// \brief Binary * operator for RealType times complex. /// /// This function exists because the compiler doesn't know that /// RealType and complex<RealType> commute with respect to operator*. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator * (const complex<RealType>& y, const RealType& x) { - return complex<RealType> (x * y.real (), x * y.imag ()); +complex<typename std::common_type<RealType1,RealType2>::type> +operator * (const complex<RealType1>& y, const RealType2& x) { + return complex<typename std::common_type<RealType1,RealType2>::type> (x * y.real (), x * y.imag ()); } //! Imaginary part of a complex number. @@ -539,33 +676,34 @@ complex<RealType> pow (const complex<RealType>& x) { //! Binary operator / for complex and real numbers template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType1> +complex<typename std::common_type<RealType1,RealType2>::type> operator / (const complex<RealType1>& x, const RealType2& y) { - return complex<RealType1> (real (x) / y, imag (x) / y); + return complex<typename std::common_type<RealType1,RealType2>::type> (real (x) / y, imag (x) / y); } //! Binary operator / for complex. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType> -operator / (const complex<RealType>& x, const complex<RealType>& y) { +complex<typename std::common_type<RealType1,RealType2>::type> +operator / (const complex<RealType1>& x, const complex<RealType2>& y) { // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - const RealType s = std::fabs (real (y)) + std::fabs (imag (y)); + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + const common_real_type s = std::fabs (real (y)) + std::fabs (imag (y)); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, // because y/s is NaN. if (s == 0.0) { - return complex<RealType> (real (x) / s, imag (x) / s); + return complex<common_real_type> (real (x) / s, imag (x) / s); } else { - const complex<RealType> x_scaled (real (x) / s, imag (x) / s); - const complex<RealType> y_conj_scaled (real (y) / s, -imag (y) / s); - const RealType y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) + + const complex<common_real_type> x_scaled (real (x) / s, imag (x) / s); + const complex<common_real_type> y_conj_scaled (real (y) / s, -imag (y) / s); + const RealType1 y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) + imag (y_conj_scaled) * imag (y_conj_scaled); // abs(y) == abs(conj(y)) - complex<RealType> result = x_scaled * y_conj_scaled; + complex<common_real_type> result = x_scaled * y_conj_scaled; result /= y_scaled_abs; return result; } @@ -574,16 +712,19 @@ operator / (const complex<RealType>& x, const complex<RealType>& y) { //! Binary operator / for complex and real numbers template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -complex<RealType1> +complex<typename std::common_type<RealType1,RealType2>::type> operator / (const RealType1& x, const complex<RealType2>& y) { - return complex<RealType1> (x)/y; + return complex<typename std::common_type<RealType1,RealType2>::type> (x)/y; } //! Equality operator for two complex numbers. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator == (const complex<RealType>& x, const complex<RealType>& y) { - return real (x) == real (y) && imag (x) == imag (y); +bool +operator == (const complex<RealType1>& x, const complex<RealType2>& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(real (x)) == static_cast<common_real_type>(real (y)) && + static_cast<common_real_type>(imag (x)) == static_cast<common_real_type>(imag (y)) ); } /// \brief Equality operator for std::complex and Kokkos::complex. @@ -592,50 +733,68 @@ bool operator == (const complex<RealType>& x, const complex<RealType>& y) { /// Otherwise, CUDA builds will give compiler warnings ("warning: /// calling a constexpr __host__ function("real") from a __host__ /// __device__ function("operator==") is not allowed"). -template<class RealType> -bool operator == (const std::complex<RealType>& x, const complex<RealType>& y) { - return std::real (x) == real (y) && std::imag (x) == imag (y); -} - +template<class RealType1, class RealType2> +inline +bool +operator == (const std::complex<RealType1>& x, const complex<RealType2>& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(std::real (x)) == static_cast<common_real_type>(real (y)) && + static_cast<common_real_type>(std::imag (x)) == static_cast<common_real_type>(imag (y)) ); +} + //! Equality operator for complex and real number. template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator == (const complex<RealType1>& x, const RealType2& y) { - return real (x) == y && imag (x) == static_cast<RealType1> (0.0); +bool +operator == (const complex<RealType1>& x, const RealType2& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(real (x)) == static_cast<common_real_type>(y) && + static_cast<common_real_type>(imag (x)) == static_cast<common_real_type>(0.0) ); } //! Equality operator for real and complex number. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator == (const RealType& x, const complex<RealType>& y) { +bool +operator == (const RealType1& x, const complex<RealType2>& y) { return y == x; } //! Inequality operator for two complex numbers. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator != (const complex<RealType>& x, const complex<RealType>& y) { - return real (x) != real (y) || imag (x) != imag (y); +bool +operator != (const complex<RealType1>& x, const complex<RealType2>& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(real (x)) != static_cast<common_real_type>(real (y)) || + static_cast<common_real_type>(imag (x)) != static_cast<common_real_type>(imag (y)) ); } //! Inequality operator for std::complex and Kokkos::complex. -template<class RealType> -KOKKOS_INLINE_FUNCTION -bool operator != (const std::complex<RealType>& x, const complex<RealType>& y) { - return std::real (x) != real (y) || std::imag (x) != imag (y); +template<class RealType1, class RealType2> +inline +bool +operator != (const std::complex<RealType1>& x, const complex<RealType2>& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(std::real (x)) != static_cast<common_real_type>(real (y)) || + static_cast<common_real_type>(std::imag (x)) != static_cast<common_real_type>(imag (y)) ); } //! Inequality operator for complex and real number. template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator != (const complex<RealType1>& x, const RealType2& y) { - return real (x) != y || imag (x) != static_cast<RealType1> (0.0); +bool +operator != (const complex<RealType1>& x, const RealType2& y) { + typedef typename std::common_type<RealType1,RealType2>::type common_real_type; + return ( static_cast<common_real_type>(real (x)) != static_cast<common_real_type>(y) || + static_cast<common_real_type>(imag (x)) != static_cast<common_real_type>(0.0) ); } //! Inequality operator for real and complex number. -template<class RealType> +template<class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION -bool operator != (const RealType& x, const complex<RealType>& y) { +bool +operator != (const RealType1& x, const complex<RealType2>& y) { return y != x; } diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp index f089c16ad2ca3c71ad7cdf2042e19bee93a7e0ed..b9c131cd7ab8077ad75201be2c6510abc4921633 100644 --- a/lib/kokkos/core/src/Kokkos_Crs.hpp +++ b/lib/kokkos/core/src/Kokkos_Crs.hpp @@ -353,7 +353,14 @@ struct CountAndFill { struct Fill {}; KOKKOS_INLINE_FUNCTION void operator()(Fill, size_type i) const { auto j = m_crs.row_map(i); - data_type* fill = &(m_crs.entries(j)); + /* we don't want to access entries(entries.size()), even if its just to get its + address and never use it. + this can happen when row (i) is empty and all rows after it are also empty. + we could compare to row_map(i + 1), but that is a read from global memory, + whereas dimension_0() should be part of the View in registers (or constant memory) */ + data_type* fill = + (j == static_cast<decltype(j)>(m_crs.entries.dimension_0())) ? + nullptr : (&(m_crs.entries(j))); m_functor(i, fill); } using self_type = CountAndFill<CrsType, Functor>; diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index 9c9af0dd8b8624b98ebfd2fbcefc8bfa613c387f..b811751a2c09c91bb75252a223cd3efa02dfbe31 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -147,12 +147,11 @@ public: , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ - static constexpr const char* name(); + static constexpr const char* name() { return "HBW"; } private: AllocationMechanism m_alloc_mech; - static constexpr const char* m_name = "HBW"; friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void >; }; diff --git a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp index 339571941d8824b77981b36ed32f9d3b131bad78..a825fd54d3918c93e3f84dc6b5cdcb443daf374c 100644 --- a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp @@ -192,7 +192,7 @@ template<> struct reduction_identity<float> { KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() {return static_cast<float>(0.0f);} KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() {return static_cast<float>(1.0f);} - KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() {return FLT_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() {return -FLT_MAX;} KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() {return FLT_MAX;} }; @@ -200,7 +200,7 @@ template<> struct reduction_identity<double> { KOKKOS_FORCEINLINE_FUNCTION constexpr static double sum() {return static_cast<double>(0.0);} KOKKOS_FORCEINLINE_FUNCTION constexpr static double prod() {return static_cast<double>(1.0);} - KOKKOS_FORCEINLINE_FUNCTION constexpr static double max() {return DBL_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static double max() {return -DBL_MAX;} KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() {return DBL_MAX;} }; @@ -208,7 +208,7 @@ template<> struct reduction_identity<long double> { KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {return static_cast<long double>(0.0);} KOKKOS_FORCEINLINE_FUNCTION constexpr static long double prod() {return static_cast<long double>(1.0);} - KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return LDBL_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return -LDBL_MAX;} KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {return LDBL_MAX;} }; diff --git a/lib/kokkos/core/src/Kokkos_ROCm.hpp b/lib/kokkos/core/src/Kokkos_ROCm.hpp index b13b0b01dea588e3ddf2fd57a7be5b24005d4498..0118d4667e30825dbcb428a91445cb1d42532b48 100644 --- a/lib/kokkos/core/src/Kokkos_ROCm.hpp +++ b/lib/kokkos/core/src/Kokkos_ROCm.hpp @@ -211,6 +211,24 @@ struct VerifyExecutionCanAccessMemorySpace } // namespace Kokkos + +#define threadIdx_x (hc_get_workitem_id(0)) +#define threadIdx_y (hc_get_workitem_id(1)) +#define threadIdx_z (hc_get_workitem_id(2)) + +#define blockIdx_x (hc_get_group_id(0)) +#define blockIdx_y (hc_get_group_id(1)) +#define blockIdx_z (hc_get_group_id(2)) + +#define blockDim_x (hc_get_group_size(0)) +#define blockDim_y (hc_get_group_size(1)) +#define blockDim_z (hc_get_group_size(2)) + +#define gridDim_x (hc_get_num_groups(0)) +#define gridDim_y (hc_get_num_groups(1)) +#define gridDim_z (hc_get_num_groups(2)) + + #include <ROCm/Kokkos_ROCm_Parallel.hpp> #include <ROCm/Kokkos_ROCm_Task.hpp> diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index 8fb13b89549b52ef790c6c1488321e9df18d3b41..a917cf16560a32ab060043854a89f1e662820974 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -88,6 +88,7 @@ build-makefile-kokkos: echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos echo "" >> Makefile.kokkos echo "#Variables used in application Makefiles" >> Makefile.kokkos + echo "KOKKOS_OS = $(KOKKOS_OS)" >> Makefile.kokkos echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 37d2ac831801ac35feca1e250f19486ff842524e..de84f6e59fed9f300b2d2bde514308d7a272187d 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -211,6 +211,7 @@ void OpenMP::partition_master( F const& f , thread_local_bytes ); + omp_set_num_threads(partition_size); f( omp_get_thread_num(), omp_get_num_threads() ); Impl::t_openmp_instance->~Exec(); diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp index 0b7a1e2583d1b88953da2524885317662b3cbd99..f2674e592951a3faefe4b8d3c4a80bb2eb325ea6 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp @@ -113,7 +113,6 @@ void reduce_enqueue( if (output_length < 1) return; - assert(output_result != nullptr); const auto td = get_tile_desc<T>(szElements,output_length,team_size,vector_size, shared_size); // allocate host and device memory for the results from each team @@ -176,14 +175,17 @@ void reduce_enqueue( } }); - ValueInit::init(ReducerConditional::select(f, reducer), output_result); + if (output_result != nullptr) + ValueInit::init(ReducerConditional::select(f, reducer), output_result); fut.wait(); copy(result,result_cpu.data()); - for(std::size_t i=0;i<td.num_tiles;i++) - ValueJoin::join(ReducerConditional::select(f, reducer), output_result, result_cpu.data()+i*output_length); + if (output_result != nullptr) { + for(std::size_t i=0;i<td.num_tiles;i++) + ValueJoin::join(ReducerConditional::select(f, reducer), output_result, result_cpu.data()+i*output_length); - ValueFinal::final( ReducerConditional::select(f, reducer) , output_result ); + ValueFinal::final( ReducerConditional::select(f, reducer) , output_result ); + } } diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp index acf75f6f1366fbc618e075ded29e52f7aa1a742c..c2e85ad11279dc329ec85e0e7e08e972abf52f22 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp @@ -67,7 +67,7 @@ void scan_enqueue( hc::array<value_type> result(td.num_tiles); hc::array<value_type> scratch(len); - tile_for<value_type>(td, [&,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] + tile_for<value_type>(td, [&,f,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] { const auto local = t_idx.local[0]; const auto global = t_idx.global[0]; @@ -135,7 +135,7 @@ void scan_enqueue( ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]); copy(result_cpu.data(),result); - hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,len,td](hc::tiled_index<1> t_idx) [[hc]] + hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] { // const auto local = t_idx.local[0]; const auto global = t_idx.global[0]; diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp index 3d3029535e9c770b65cbe07af5f6256fd01ada0d..c5e73c8b26b0d327c6ee40fd6ad429b0a91e8f50 100644 --- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -68,6 +68,8 @@ int bit_first_zero( unsigned i ) noexcept return full != i ? _bit_scan_forward( ~i ) : -1 ; #elif defined( KOKKOS_COMPILER_IBM ) return full != i ? __cnttz4( ~i ) : -1 ; +#elif defined( KOKKOS_COMPILER_CRAYC ) + return full != i ? _popcnt( i ^ (i+1) ) - 1 : -1 ; #elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ ) return full != i ? __builtin_ffs( ~i ) - 1 : -1 ; #else @@ -90,17 +92,16 @@ int bit_scan_forward( unsigned i ) return _bit_scan_forward(i); #elif defined( KOKKOS_COMPILER_IBM ) return __cnttz4(i); +#elif defined( KOKKOS_COMPILER_CRAYC ) + return i ? _popcnt(~i & (i-1)) : -1; #elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ ) return __builtin_ffs(i) - 1; #else - unsigned t = 1u; - int r = 0; - while ( i && ( i & t == 0 ) ) - { - t = t << 1; - ++r; + int offset = -1; + if ( i ) { + for ( offset = 0 ; (i & ( 1 << offset ) ) == 0 ; ++offset ); } - return r; + return offset; #endif } @@ -116,17 +117,16 @@ int bit_scan_reverse( unsigned i ) return _bit_scan_reverse(i); #elif defined( KOKKOS_COMPILER_IBM ) return shift - __cntlz4(i); +#elif defined( KOKKOS_COMPILER_CRAYC ) + return i ? shift - _leadz32(i) : 0 ; #elif defined( __GNUC__ ) || defined( __GNUG__ ) return shift - __builtin_clz(i); #else - unsigned t = 1u << shift; - int r = 0; - while ( i && ( i & t == 0 ) ) - { - t = t >> 1; - ++r; + int offset = 0; + if ( i ) { + for ( offset = shift ; (i & ( 1 << offset ) ) == 0 ; --offset ); } - return r; + return offset; #endif } @@ -142,6 +142,8 @@ int bit_count( unsigned i ) return _popcnt32(i); #elif defined( KOKKOS_COMPILER_IBM ) return __popcnt4(i); +#elif defined( KOKKOS_COMPILER_CRAYC ) + return _popcnt(i); #elif defined( __GNUC__ ) || defined( __GNUG__ ) return __builtin_popcount(i); #else diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index e11f8b6d346491f75fe0e18f0bda85385233907e..cd0553218d0753bf9185fd9a91014b47ea68a80b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -166,10 +166,6 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s } } -constexpr const char* HBWSpace::name() { - return m_name; -} - } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/core/unit_test/TestComplex.hpp b/lib/kokkos/core/unit_test/TestComplex.hpp index ce5537fed362a43eac1b57c8f63a06d7329c1ff4..c7f681699e40ae1b921ce0d2ef1dcf0f31c1424c 100644 --- a/lib/kokkos/core/unit_test/TestComplex.hpp +++ b/lib/kokkos/core/unit_test/TestComplex.hpp @@ -114,7 +114,7 @@ struct TestComplexBasicMath { typename Kokkos::View<Kokkos::complex<double>*,ExecSpace>::HostMirror h_results; void testit () { - d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexBasicMath",20); + d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexBasicMath",24); h_results = Kokkos::create_mirror_view(d_results); Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0,1), *this); @@ -125,6 +125,7 @@ struct TestComplexBasicMath { std::complex<double> b(3.25,5.75); std::complex<double> d(1.0,2.0); double c = 9.3; + int e = 2; std::complex<double> r; r = a+b; ASSERT_FLOAT_EQ(h_results(0).real(), r.real()); ASSERT_FLOAT_EQ(h_results(0).imag(), r.imag()); @@ -147,6 +148,12 @@ struct TestComplexBasicMath { r = c-a; ASSERT_FLOAT_EQ(h_results(17).real(), r.real()); ASSERT_FLOAT_EQ(h_results(17).imag(), r.imag()); r = c*a; ASSERT_FLOAT_EQ(h_results(18).real(), r.real()); ASSERT_FLOAT_EQ(h_results(18).imag(), r.imag()); r = c/a; ASSERT_FLOAT_EQ(h_results(19).real(), r.real()); ASSERT_FLOAT_EQ(h_results(19).imag(), r.imag()); + + r = a; + /* r = a+e; */ ASSERT_FLOAT_EQ(h_results(20).real(), r.real()+e); ASSERT_FLOAT_EQ(h_results(20).imag(), r.imag()); + /* r = a-e; */ ASSERT_FLOAT_EQ(h_results(21).real(), r.real()-e); ASSERT_FLOAT_EQ(h_results(21).imag(), r.imag()); + /* r = a*e; */ ASSERT_FLOAT_EQ(h_results(22).real(), r.real()*e); ASSERT_FLOAT_EQ(h_results(22).imag(), r.imag()*e); + /* r = a/e; */ ASSERT_FLOAT_EQ(h_results(23).real(), r.real()/2); ASSERT_FLOAT_EQ(h_results(23).imag(), r.imag()/e); } KOKKOS_INLINE_FUNCTION @@ -190,6 +197,12 @@ struct TestComplexBasicMath { d_results(17) = c-a; d_results(18) = c*a; d_results(19) = c/a; + + int e = 2; + d_results(20) = a+e; + d_results(21) = a-e; + d_results(22) = a*e; + d_results(23) = a/e; } }; diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index f579ddf02c8242a43931c5177cfc3cad58c45078..fbc3a65c2fbf3eb3aea2127a17495dd81fb2f5c7 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -286,7 +286,9 @@ struct TestMDRange_2D { // Test with reducers - scalar { typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> > range_type; - range_type range( {{ 0, 0 }}, {{ N0, N1 }}, {{ 3, 3 }} ); + int s0 = 1; + int s1 = 1; + range_type range( {{ s0, s1 }}, {{ N0, N1 }}, {{ 3, 3 }} ); TestMDRange_2D functor( N0, N1 ); @@ -297,7 +299,7 @@ struct TestMDRange_2D { parallel_reduce( range, functor, reducer_scalar ); - ASSERT_EQ( sum, 2 * N0 * N1 ); + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) ); } // Test with reducers - scalar view { @@ -445,7 +447,9 @@ struct TestMDRange_2D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + const int s0 = 1; + const int s1 = 1; + range_type range( point_type{ { s0, s1 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); TestMDRange_2D functor( N0, N1 ); parallel_for( range, functor ); @@ -454,8 +458,8 @@ struct TestMDRange_2D { Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i = 0; i < N0; ++i ) - for ( int j = 0; j < N1; ++j ) + for ( int i = s0; i < N0; ++i ) + for ( int j = s1; j < N1; ++j ) { if ( h_view( i, j ) != 3 ) { ++counter; @@ -463,7 +467,7 @@ struct TestMDRange_2D { } if ( counter != 0 ) { - printf( "Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter ); + printf( "Offset Start + Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter ); } ASSERT_EQ( counter, 0 ); @@ -699,6 +703,7 @@ struct TestMDRange_2D { ASSERT_EQ( counter, 0 ); } + } // end test_for2 }; // MDRange_2D @@ -749,7 +754,10 @@ struct TestMDRange_3D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + range_type range( point_type{ { s0, s1, s2 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); TestMDRange_3D functor( N0, N1, N2 ); @@ -757,7 +765,7 @@ struct TestMDRange_3D { double sum = 0.0; parallel_reduce( range, functor, sum ); - ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) ); } // Test with reducers - scalar @@ -952,7 +960,10 @@ struct TestMDRange_3D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + range_type range( point_type{ { s0, s1, s2 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); TestMDRange_3D functor( N0, N1, N2 ); parallel_for( range, functor ); @@ -961,9 +972,9 @@ struct TestMDRange_3D { Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i = 0; i < N0; ++i ) - for ( int j = 0; j < N1; ++j ) - for ( int k = 0; k < N2; ++k ) + for ( int i = s0; i < N0; ++i ) + for ( int j = s1; j < N1; ++j ) + for ( int k = s2; k < N2; ++k ) { if ( h_view( i, j, k ) != 3 ) { ++counter; @@ -971,7 +982,7 @@ struct TestMDRange_3D { } if ( counter != 0 ) { - printf( "Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter ); + printf( "Offset Start + Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter ); } ASSERT_EQ( counter, 0 ); @@ -1207,7 +1218,11 @@ struct TestMDRange_4D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 3, 3, 3 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + range_type range( point_type{ { s0, s1, s2, s3 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 3, 3, 3 } } ); TestMDRange_4D functor( N0, N1, N2, N3 ); @@ -1215,7 +1230,7 @@ struct TestMDRange_4D { double sum = 0.0; parallel_reduce( range, functor, sum ); - ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) ); } // Test with reducers - scalar @@ -1415,7 +1430,11 @@ struct TestMDRange_4D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + range_type range( point_type{ { s0, s1, s2, s3 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } ); TestMDRange_4D functor( N0, N1, N2, N3 ); parallel_for( range, functor ); @@ -1424,10 +1443,10 @@ struct TestMDRange_4D { Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i = 0; i < N0; ++i ) - for ( int j = 0; j < N1; ++j ) - for ( int k = 0; k < N2; ++k ) - for ( int l = 0; l < N3; ++l ) + for ( int i = s0; i < N0; ++i ) + for ( int j = s1; j < N1; ++j ) + for ( int k = s2; k < N2; ++k ) + for ( int l = s3; l < N3; ++l ) { if ( h_view( i, j, k, l ) != 3 ) { ++counter; @@ -1435,7 +1454,7 @@ struct TestMDRange_4D { } if ( counter != 0 ) { - printf("Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter); + printf("Offset Start + Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter); } ASSERT_EQ( counter, 0 ); @@ -1682,7 +1701,12 @@ struct TestMDRange_5D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 3 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + int s4 = 1; + range_type range( point_type{ { s0, s1, s2, s3, s4 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 3 } } ); TestMDRange_5D functor( N0, N1, N2, N3, N4 ); @@ -1690,7 +1714,7 @@ struct TestMDRange_5D { double sum = 0.0; parallel_reduce( range, functor, sum ); - ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) * (N4 - s4) ); } // Test with reducers - scalar @@ -1810,7 +1834,12 @@ struct TestMDRange_5D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 5 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + int s4 = 1; + range_type range( point_type{ { s0, s1, s2, s3, s4 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 5 } } ); TestMDRange_5D functor( N0, N1, N2, N3, N4 ); parallel_for( range, functor ); @@ -1819,11 +1848,11 @@ struct TestMDRange_5D { Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i = 0; i < N0; ++i ) - for ( int j = 0; j < N1; ++j ) - for ( int k = 0; k < N2; ++k ) - for ( int l = 0; l < N3; ++l ) - for ( int m = 0; m < N4; ++m ) + for ( int i = s0; i < N0; ++i ) + for ( int j = s1; j < N1; ++j ) + for ( int k = s2; k < N2; ++k ) + for ( int l = s3; l < N3; ++l ) + for ( int m = s4; m < N4; ++m ) { if ( h_view( i, j, k, l, m ) != 3 ) { ++counter; @@ -1831,7 +1860,7 @@ struct TestMDRange_5D { } if ( counter != 0 ) { - printf( "Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter ); + printf( "Offset Start + Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter ); } ASSERT_EQ( counter, 0 ); @@ -2084,7 +2113,13 @@ struct TestMDRange_6D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 3, 2 } } ); + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + int s4 = 1; + int s5 = 1; + range_type range( point_type{ { s0, s1, s2, s3, s4, s5 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 3, 2 } } ); TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); @@ -2092,7 +2127,7 @@ struct TestMDRange_6D { double sum = 0.0; parallel_reduce( range, functor, sum ); - ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) * (N4 - s4) * (N5 - s5) ); } // Test with reducers - scalar @@ -2214,7 +2249,13 @@ struct TestMDRange_6D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging + int s0 = 1; + int s1 = 1; + int s2 = 1; + int s3 = 1; + int s4 = 1; + int s5 = 1; + range_type range( point_type{ { s0, s1, s2, s3, s4, s5 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); parallel_for( range, functor ); @@ -2223,12 +2264,12 @@ struct TestMDRange_6D { Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i = 0; i < N0; ++i ) - for ( int j = 0; j < N1; ++j ) - for ( int k = 0; k < N2; ++k ) - for ( int l = 0; l < N3; ++l ) - for ( int m = 0; m < N4; ++m ) - for ( int n = 0; n < N5; ++n ) + for ( int i = s0; i < N0; ++i ) + for ( int j = s1; j < N1; ++j ) + for ( int k = s2; k < N2; ++k ) + for ( int l = s3; l < N3; ++l ) + for ( int m = s4; m < N4; ++m ) + for ( int n = s5; n < N5; ++n ) { if ( h_view( i, j, k, l, m, n ) != 3 ) { ++counter; @@ -2236,7 +2277,7 @@ struct TestMDRange_6D { } if ( counter != 0 ) { - printf( "Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter ); + printf( "Offset Start + Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter ); } ASSERT_EQ( counter, 0 ); diff --git a/lib/latte/Install.py b/lib/latte/Install.py index b3e771e4cc7b53bda4ff0082256a6bc5e169c526..37cb5d6b17f135aa7b2c371278d1d78b7e6ec3b7 100644 --- a/lib/latte/Install.py +++ b/lib/latte/Install.py @@ -159,13 +159,13 @@ if buildflag or pathflag: os.remove("includelink") if os.path.isfile("liblink") or os.path.islink("liblink"): os.remove("liblink") - if os.path.isfile("filelink") or os.path.islink("filelink"): - os.remove("filelink") + if os.path.isfile("filelink.o") or os.path.islink("filelink.o"): + os.remove("filelink.o") cmd = 'ln -s "%s/src" includelink' % lattedir subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) cmd = 'ln -s "%s" liblink' % lattedir subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) - cmd = 'ln -s "%s/src/latte_c_bind.o" filelink' % lattedir + cmd = 'ln -s "%s/src/latte_c_bind.o" filelink.o' % lattedir subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) # copy Makefile.lammps.suffix to Makefile.lammps diff --git a/lib/latte/Makefile.lammps.gfortran b/lib/latte/Makefile.lammps.gfortran index 921721552be381ecbb8b46c43657c5b679006448..6aa7782f8ac2a0f7430ef89bc70c1543cd46c351 100644 --- a/lib/latte/Makefile.lammps.gfortran +++ b/lib/latte/Makefile.lammps.gfortran @@ -3,5 +3,5 @@ # GNU Fortran settings latte_SYSINC = -latte_SYSLIB = ../../lib/latte/filelink -llatte -lgfortran -llapack -lblas +latte_SYSLIB = ../../lib/latte/filelink.o -llatte -lgfortran -llapack -lblas latte_SYSPATH = -fopenmp diff --git a/lib/latte/Makefile.lammps.ifort b/lib/latte/Makefile.lammps.ifort index 23d2b32fcc713dc6b6fcd24d8b0473649d279377..0491bdd8a5f41065df995def3b3c0105600fdf28 100644 --- a/lib/latte/Makefile.lammps.ifort +++ b/lib/latte/Makefile.lammps.ifort @@ -3,7 +3,7 @@ # Intel ifort settings latte_SYSINC = -latte_SYSLIB = ../../lib/latte/filelink \ +latte_SYSLIB = ../../lib/latte/filelink.o \ -llatte -lifcore -lsvml -lompstub -limf -lmkl_intel_lp64 \ -lmkl_intel_thread -lmkl_core -lmkl_intel_thread -lpthread \ -openmp -O0 diff --git a/python/lammps.py b/python/lammps.py index 2db657fbae84c17a7afb6af7b10eecebb4bb4ccf..944eaeabf5a982afddfb79cc8a9f4f286c8aa94e 100644 --- a/python/lammps.py +++ b/python/lammps.py @@ -861,6 +861,19 @@ class PyLammps(object): """ needed for Python2 compatibility, since print is a reserved keyword """ return self.__getattr__("print")(s) + def __dir__(self): + return ['angle_coeff', 'angle_style', 'atom_modify', 'atom_style', 'atom_style', + 'bond_coeff', 'bond_style', 'boundary', 'change_box', 'communicate', 'compute', + 'create_atoms', 'create_box', 'delete_atoms', 'delete_bonds', 'dielectric', + 'dihedral_coeff', 'dihedral_style', 'dimension', 'dump', 'fix', 'fix_modify', + 'group', 'improper_coeff', 'improper_style', 'include', 'kspace_modify', + 'kspace_style', 'lattice', 'mass', 'minimize', 'min_style', 'neighbor', + 'neigh_modify', 'newton', 'nthreads', 'pair_coeff', 'pair_modify', + 'pair_style', 'processors', 'read', 'read_data', 'read_restart', 'region', + 'replicate', 'reset_timestep', 'restart', 'run', 'run_style', 'thermo', + 'thermo_modify', 'thermo_style', 'timestep', 'undump', 'unfix', 'units', + 'variable', 'velocity', 'write_restart'] + def __getattr__(self, name): def handler(*args, **kwargs): cmd_args = [name] + [str(x) for x in args] diff --git a/src/.gitignore b/src/.gitignore index 1571065b72e57ce17ef3893a2706230ff2e1038d..8d997760966204a8eb1da712f98df065299c2cc1 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -103,6 +103,15 @@ /sna.cpp /sna.h +/uef_*.cpp +/uef_*.h +/compute_*uef.cpp +/compute_*uef.h +/dump_*uef.cpp +/dump_*uef.h +/fix_*uef.cpp +/fix_*uef.h + /atom_vec_wavepacket.cpp /atom_vec_wavepacket.h /fix_nve_awpmd.cpp @@ -185,6 +194,8 @@ /bond_fene.h /bond_fene_expand.cpp /bond_fene_expand.h +/bond_gromos.cpp +/bond_gromos.h /bond_harmonic.cpp /bond_harmonic.h /bond_harmonic_shift.cpp @@ -405,6 +416,8 @@ /fix_lambdah_calc.h /fix_langevin_eff.cpp /fix_langevin_eff.h +/fix_latte.cpp +/fix_latte.h /fix_lb_fluid.cpp /fix_lb_fluid.h /fix_lb_momentum.cpp @@ -503,6 +516,8 @@ /fix_reaxc_bonds.h /fix_reaxc_species.cpp /fix_reaxc_species.h +/fix_rhok.cpp +/fix_rhok.h /fix_rigid.cpp /fix_rigid.h /fix_rigid_nh.cpp diff --git a/src/CORESHELL/Install.sh b/src/CORESHELL/Install.sh index 7c0b7a02a23c0c5817eef8b1c2764f0dc98dd295..93c8fe8132c2bcdc59252719e3d7d42766583d3d 100644 --- a/src/CORESHELL/Install.sh +++ b/src/CORESHELL/Install.sh @@ -30,13 +30,19 @@ action () { action compute_temp_cs.cpp action compute_temp_cs.h -action pair_born_coul_long_cs.cpp pair_born_coul_long.cpp -action pair_born_coul_dsf_cs.cpp pair_born_coul_dsf.cpp -action pair_buck_coul_long_cs.cpp pair_buck_coul_long.cpp -action pair_born_coul_long_cs.h pair_born_coul_long.h -action pair_born_coul_dsf_cs.h pair_born_coul_dsf.h -action pair_buck_coul_long_cs.h pair_buck_coul_long.h -action pair_coul_long_cs.cpp pair_coul_long.cpp -action pair_coul_long_cs.h pair_coul_long.h + +action pair_born_coul_long_cs.cpp pair_born_coul_long.cpp +action pair_born_coul_dsf_cs.cpp pair_born_coul_dsf.cpp +action pair_buck_coul_long_cs.cpp pair_buck_coul_long.cpp +action pair_born_coul_long_cs.h pair_born_coul_long.h +action pair_born_coul_dsf_cs.h pair_born_coul_dsf.h +action pair_buck_coul_long_cs.h pair_buck_coul_long.h +action pair_coul_long_cs.cpp pair_coul_long.cpp +action pair_coul_long_cs.h pair_coul_long.h action pair_lj_cut_coul_long_cs.cpp pair_lj_cut_coul_long.cpp -action pair_lj_cut_coul_long_cs.h pair_lj_cut_coul_long.h +action pair_lj_cut_coul_long_cs.h pair_lj_cut_coul_long.h + +action pair_born_coul_wolf_cs.cpp pair_born_coul_wolf.cpp +action pair_born_coul_wolf_cs.h pair_born_coul_wolf.h +action pair_coul_wolf_cs.cpp pair_coul_wolf.cpp +action pair_coul_wolf_cs.h pair_coul_wolf.h diff --git a/src/CORESHELL/pair_born_coul_wolf_cs.cpp b/src/CORESHELL/pair_born_coul_wolf_cs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3c98f5bf770cc2aff2881480fb65ab2dfd18f41f --- /dev/null +++ b/src/CORESHELL/pair_born_coul_wolf_cs.cpp @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pair_born_coul_wolf_cs.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define EPSILON 1.0e-20 + +/* ---------------------------------------------------------------------- */ + +PairBornCoulWolfCS::PairBornCoulWolfCS(LAMMPS *lmp) : PairBornCoulWolf(lmp) +{ + single_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBornCoulWolfCS::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj; + double prefactor; + double r,rexp; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / cut_coul; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (eflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + rsq += EPSILON; + // Add EPISLON for case: r = 0; Interaction must be removed + // by special bond + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fpair = (forcecoul + factor_lj*forceborn) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + if (rsq < cut_coulsq) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,ecoul,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* +Using erfc and expmsq provided by math_special.h + +See: http://lammps.sandia.gov/threads/msg61934.html +*/ diff --git a/src/CORESHELL/pair_born_coul_wolf_cs.h b/src/CORESHELL/pair_born_coul_wolf_cs.h new file mode 100644 index 0000000000000000000000000000000000000000..00bbd5874c2694ca54b8ec0b683aef3fc2205594 --- /dev/null +++ b/src/CORESHELL/pair_born_coul_wolf_cs.h @@ -0,0 +1,60 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/coul/wolf/cs,PairBornCoulWolfCS) + +#else + +#ifndef LMP_PAIR_BORN_COUL_WOLF_CS_H +#define LMP_PAIR_BORN_COUL_WOLF_CS_H + +#include "pair_born_coul_wolf.h" + +namespace LAMMPS_NS { + +class PairBornCoulWolfCS : public PairBornCoulWolf { + public: + PairBornCoulWolfCS(class LAMMPS *); + virtual void compute(int, int); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Pair style born/coul/wolf/cs requires atom attribute q + +An atom style that defines this attribute must be used. + + +*/ diff --git a/src/CORESHELL/pair_coul_wolf_cs.cpp b/src/CORESHELL/pair_coul_wolf_cs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15de7c677733fb1a89be6884aa879fee685ba2a1 --- /dev/null +++ b/src/CORESHELL/pair_coul_wolf_cs.cpp @@ -0,0 +1,145 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pair_coul_wolf_cs.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define EPSILON 1.0e-20 + +/* ---------------------------------------------------------------------- */ + +PairCoulWolfCS::PairCoulWolfCS(LAMMPS *lmp) : PairCoulWolf( lmp ) +{ + single_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulWolfCS::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,forcecoul,factor_coul; + double prefactor; + double r; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq; + + ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cut_coulsq) { + rsq += EPSILON; + // Add EPISLON for case: r = 0; Interaction must be removed + // by special bond + r = sqrt(rsq); + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + fpair = forcecoul / rsq; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + 0.0,ecoul,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* NOTES +Using erfc and expmsq provided by math_special.h + +See: http://lammps.sandia.gov/threads/msg61934.html +*/ diff --git a/src/CORESHELL/pair_coul_wolf_cs.h b/src/CORESHELL/pair_coul_wolf_cs.h new file mode 100644 index 0000000000000000000000000000000000000000..8d98b1bbc6d44cbb837f8f68e4eb7fc2f04e74c4 --- /dev/null +++ b/src/CORESHELL/pair_coul_wolf_cs.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/wolf/cs,PairCoulWolfCS) + +#else + +#ifndef LMP_PAIR_COUL_WOLF_CS_H_ +#define LMP_PAIR_COUL_WOLF_CS_H_ + +#include "pair_coul_wolf.h" + +namespace LAMMPS_NS { + +class PairCoulWolfCS : public PairCoulWolf { + public: + PairCoulWolfCS( class LAMMPS *); + virtual void compute( int, int); +}; + +} + +#endif +#endif /* LMP_PAIR_COUL_WOLF_CS_H_ */ + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair coul/wolf/cs requires atom attribute q + +The atom style defined does not have this attribute. + +*/ diff --git a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp index e2ffda148f1627d6726adc08c58e31b57f3041db..d418cf20afe44020db9d05c49b1c1eb4c985af64 100644 --- a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp +++ b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp @@ -225,10 +225,10 @@ void PairLJCutCoulLongCS::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -311,10 +311,10 @@ void PairLJCutCoulLongCS::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -412,10 +412,10 @@ void PairLJCutCoulLongCS::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp index b833b250d4a644849a3875ba1d9a74f8e403540a..c9b2b3f4afe5dba0aba0bf5f7ebcad1b433e7d1d 100644 --- a/src/DIPOLE/pair_lj_long_dipole_long.cpp +++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp @@ -263,22 +263,6 @@ void PairLJLongDipoleLong::init_style() if (force->kspace) g_ewald = force->kspace->g_ewald; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJLongDipoleLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; - - if (id) - error->all(FLERR,"Pair style lj/long/dipole/long does not currently support respa"); -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/DIPOLE/pair_lj_long_dipole_long.h b/src/DIPOLE/pair_lj_long_dipole_long.h index f9fa10af11bcd079df4a1bb9b5677f6c555efca5..2ace9ca30163daa2c6c159e0b47ef8c165e8e708 100644 --- a/src/DIPOLE/pair_lj_long_dipole_long.h +++ b/src/DIPOLE/pair_lj_long_dipole_long.h @@ -34,7 +34,6 @@ class PairLJLongDipoleLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/Depend.sh b/src/Depend.sh index 94636079603b3c5c2f5543be735eb8c7a3eab97c..e1c812ebc26f740d98cc3060ec2636ea31ab103b 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -119,6 +119,10 @@ if (test $1 = "USER-DPD") then depend KOKKOS fi +if (test $1 = "USER-DRUDE") then + depend USER-OMP +fi + if (test $1 = "USER-FEP") then depend USER-OMP fi diff --git a/src/GRANULAR/pair_gran_hertz_history.cpp b/src/GRANULAR/pair_gran_hertz_history.cpp index e52aac10dbbb92ef15a8572115cd21756f35c9f9..9723531625fdf44b81fa727538a394f9f24a157e 100644 --- a/src/GRANULAR/pair_gran_hertz_history.cpp +++ b/src/GRANULAR/pair_gran_hertz_history.cpp @@ -24,6 +24,7 @@ #include "update.h" #include "force.h" #include "fix.h" +#include "fix_neigh_history.h" #include "neighbor.h" #include "neigh_list.h" #include "comm.h" @@ -95,8 +96,8 @@ void PairGranHertzHistory::compute(int eflag, int vflag) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; + firsttouch = fix_history->firstflag; + firstshear = fix_history->firstvalue; // loop over neighbors of my atoms @@ -407,7 +408,7 @@ double PairGranHertzHistory::single(int i, int j, int itype, int jtype, int jnum = list->numneigh[i]; int *jlist = list->firstneigh[i]; - double *allshear = list->listhistory->firstdouble[i]; + double *allshear = fix_history->firstvalue[i]; for (int jj = 0; jj < jnum; jj++) { neighprev++; diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp index e9662c9e7341ab77c6b76b69ac3ac773df501f4f..4f120150ded77ccfdaf5e839f2b5cc68a39ddfd7 100644 --- a/src/GRANULAR/pair_gran_hooke_history.cpp +++ b/src/GRANULAR/pair_gran_hooke_history.cpp @@ -27,7 +27,7 @@ #include "update.h" #include "modify.h" #include "fix.h" -#include "fix_shear_history.h" +#include "fix_neigh_history.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" @@ -64,7 +64,7 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp) PairGranHookeHistory::~PairGranHookeHistory() { delete [] svector; - if (fix_history) modify->delete_fix("SHEAR_HISTORY"); + if (fix_history) modify->delete_fix("NEIGH_HISTORY"); if (allocated) { memory->destroy(setflag); @@ -137,8 +137,8 @@ void PairGranHookeHistory::compute(int eflag, int vflag) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; + firsttouch = fix_history->firstflag; + firstshear = fix_history->firstvalue; // loop over neighbors of my atoms @@ -400,35 +400,28 @@ void PairGranHookeHistory::init_style() if (comm->ghost_velocity == 0) error->all(FLERR,"Pair granular requires ghost atoms store velocity"); - // need a granular neigh list and optionally a granular history neigh list + // need a granular neigh list int irequest = neighbor->request(this,instance_me); neighbor->requests[irequest]->size = 1; - if (history) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->history = 1; - neighbor->requests[irequest]->dnum = 3; - } + if (history) neighbor->requests[irequest]->history = 1; dt = update->dt; - // if shear history is stored: // if first init, create Fix needed for storing shear history if (history && fix_history == NULL) { char dnumstr[16]; sprintf(dnumstr,"%d",3); char **fixarg = new char*[4]; - fixarg[0] = (char *) "SHEAR_HISTORY"; + fixarg[0] = (char *) "NEIGH_HISTORY"; fixarg[1] = (char *) "all"; - fixarg[2] = (char *) "SHEAR_HISTORY"; + fixarg[2] = (char *) "NEIGH_HISTORY"; fixarg[3] = dnumstr; - modify->add_fix(4,fixarg); + modify->add_fix(4,fixarg,1); delete [] fixarg; - fix_history = (FixShearHistory *) modify->fix[modify->nfix-1]; + fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1]; fix_history->pair = this; - neighbor->requests[irequest]->fix_history = fix_history; } // check for FixFreeze and set freeze_group_bit @@ -494,23 +487,12 @@ void PairGranHookeHistory::init_style() // set fix which stores history info if (history) { - int ifix = modify->find_fix("SHEAR_HISTORY"); - if (ifix < 0) error->all(FLERR,"Could not find pair fix ID"); - fix_history = (FixShearHistory *) modify->fix[ifix]; + int ifix = modify->find_fix("NEIGH_HISTORY"); + if (ifix < 0) error->all(FLERR,"Could not find pair fix neigh history ID"); + fix_history = (FixNeighHistory *) modify->fix[ifix]; } } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - optional granular history list -------------------------------------------------------------------------- */ - -void PairGranHookeHistory::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listhistory = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -704,7 +686,7 @@ double PairGranHookeHistory::single(int i, int j, int itype, int jtype, int jnum = list->numneigh[i]; int *jlist = list->firstneigh[i]; - double *allshear = list->listhistory->firstdouble[i]; + double *allshear = fix_history->firstvalue[i]; for (int jj = 0; jj < jnum; jj++) { neighprev++; @@ -797,14 +779,3 @@ double PairGranHookeHistory::memory_usage() double bytes = nmax * sizeof(double); return bytes; } - -/* ---------------------------------------------------------------------- - return ptr to FixShearHistory class - called by Neighbor when setting up neighbor lists -------------------------------------------------------------------------- */ - -void *PairGranHookeHistory::extract(const char *str, int &dim) -{ - if (strcmp(str,"history") == 0) return (void *) fix_history; - return NULL; -} diff --git a/src/GRANULAR/pair_gran_hooke_history.h b/src/GRANULAR/pair_gran_hooke_history.h index afeab93413c27257f784842857fef631d7fc249c..f02cccd55ef177ba71e022447d49c9c71036b1cd 100644 --- a/src/GRANULAR/pair_gran_hooke_history.h +++ b/src/GRANULAR/pair_gran_hooke_history.h @@ -32,7 +32,6 @@ class PairGranHookeHistory : public Pair { virtual void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); @@ -43,7 +42,6 @@ class PairGranHookeHistory : public Pair { int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); double memory_usage(); - void *extract(const char *, int &); protected: double kn,kt,gamman,gammat,xmu; @@ -56,7 +54,7 @@ class PairGranHookeHistory : public Pair { double *onerad_dynamic,*onerad_frozen; double *maxrad_dynamic,*maxrad_frozen; - class FixShearHistory *fix_history; + class FixNeighHistory *fix_history; // storage of rigid body masses for use in granular interactions diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index b63dc5fb8c94aad0ac9312fad0268a89d2f4d62d..6c610c8c111409d8c18201d9be23e44a4fe9195c 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -136,450 +136,6 @@ void AtomVecAtomicKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecAtomicKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecAtomicKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; - const size_t elements = 3; - buffer_view<DeviceType>(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecAtomicKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_x_array _xw; - int _nfirst; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecAtomicKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType> -struct AtomVecAtomicKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; - int _first; - - AtomVecAtomicKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) { - sync(Host,F_MASK); - modified(Host,F_MASK); - } - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template<class DeviceType,int PBC_FLAG> struct AtomVecAtomicKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 5e9a72c2e377c18c9435744fda21f70844d9c4a7..e4d2654e2cb14e6bcd95e812e6204d33ed0c6304 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -33,12 +33,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { virtual ~AtomVecAtomicKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); void unpack_border(int, int, double *); @@ -55,15 +49,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -99,9 +84,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::tdual_int_1d k_count; }; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index e0f29a27bb7f251aa4816e4bd1a19f43f6f2a39b..076144420c1442c5db69008905263313b4757be2 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -178,448 +178,6 @@ void AtomVecBondKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecBondKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecBondKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; - const size_t elements = 3; - buffer_view<DeviceType>(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecBondKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_x_array _xw; - int _nfirst; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecBondKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType> -struct AtomVecBondKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; - int _first; - - AtomVecBondKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template<class DeviceType,int PBC_FLAG> struct AtomVecBondKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 3dcc99fa784bb86a33ef11474e42865a04d2253f..7ec15450efc23c91d9ff897fe09a52e9046e1dd3 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -32,12 +32,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { virtual ~AtomVecBondKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -59,15 +53,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -112,9 +97,6 @@ class AtomVecBondKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_tagint_1d d_molecule; DAT::t_int_2d d_nspecial; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 89f7e91c2b18feabc5c5427c5b92092afdfd0ce8..7b8b74b4051b63411f673b537ca4c1b9ce731198 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -199,397 +199,6 @@ struct AtomVecChargeKokkos_PackComm { /* ---------------------------------------------------------------------- */ -int AtomVecChargeKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecChargeKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_x_array _xw; - int _nfirst; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecChargeKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType> -struct AtomVecChargeKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; - int _first; - - AtomVecChargeKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template<class DeviceType,int PBC_FLAG> struct AtomVecChargeKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index f9b385e7ed87aaf7557dfb0d0c7bdbbc2febafb2..e9ff70bbe103c7c1df5d99387ee92a8530bcd79b 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -33,12 +33,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { virtual ~AtomVecChargeKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -60,15 +54,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -108,9 +93,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_float_1d d_q; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 372404cc7d9f239e8b23516f091a67011e7e59bd..cec1b82357eab95cf9a34595b5f398793cb69f4d 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -111,9 +111,6 @@ class AtomVecDPDKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::tdual_int_1d k_count; }; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index fd7eaf7c81c9c6814b606976fc4f06def58f9339..8e9abe40675f7816ca969a23c0ab55855f71c5d1 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -307,452 +307,6 @@ void AtomVecFullKokkos::copy(int i, int j, int delflag) /* ---------------------------------------------------------------------- */ -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecFullKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecFullKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view<DeviceType>().dimension_0() - *buf.view<DeviceType>().dimension_1())/3; - const size_t elements = 3; - buffer_view<DeviceType>(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm<LMPHostType,1,1> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm<LMPHostType,1,0> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm<LMPHostType,0,1> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm<LMPHostType,0,0> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0> - f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType,int PBC_FLAG,int TRICLINIC> -struct AtomVecFullKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array_randomread _x; - typename ArrayTypes<DeviceType>::t_x_array _xw; - int _nfirst; - typename ArrayTypes<DeviceType>::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecFullKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst), - _list(list.view<DeviceType>()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, - const int nfirst, const int &pbc_flag, - const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0> - f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template<class DeviceType> -struct AtomVecFullKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes<DeviceType>::t_x_array _x; - typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; - int _first; - - AtomVecFullKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) - modified(Host,F_MASK); - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - template<class DeviceType,int PBC_FLAG> struct AtomVecFullKokkos_PackBorder { typedef DeviceType device_type; diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index 760df087e14105a3330b4a5301a1ddc09f9900af..33760a8b5fc88fcfab56e892f9c2848cd228043b 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -32,12 +32,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { virtual ~AtomVecFullKokkos() {} void grow(int); void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); int pack_border(int, int *, double *, int, int *); int pack_border_vel(int, int *, double *, int, int *); int pack_border_hybrid(int, int *, double *); @@ -59,15 +53,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { bigint memory_usage(); void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space); @@ -125,9 +110,6 @@ class AtomVecFullKokkos : public AtomVecKokkos { DAT::t_x_array d_x; DAT::t_v_array d_v; DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; DAT::t_float_1d d_q; HAT::t_float_1d h_q; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 5542991395157470204cf82b701c92c3908368d0..03fb2a4ead549b09096705cefb7f929b69bc2cce 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -12,6 +12,10 @@ ------------------------------------------------------------------------- */ #include "atom_vec_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -24,3 +28,585 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) buffer_size = 0; } +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (atom->mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_PackReverse { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_f_array_randomread _f; + typename ArrayTypes<DeviceType>::t_ffloat_2d _buf; + int _first; + + AtomVecKokkos_PackReverse( + const typename DAT::tdual_f_array &f, + const typename DAT::tdual_ffloat_2d &buf, + const int& first):_f(f.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _buf(i,0) = _f(i+_first,0); + _buf(i,1) = _f(i+_first,1); + _buf(i,2) = _f(i+_first,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, + const DAT::tdual_ffloat_2d &buf ) { + if(commKK->reverse_comm_on_host) { + sync(Host,F_MASK); + struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,F_MASK); + struct AtomVecKokkos_PackReverse<LMPDeviceType> f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } + + return n*size_reverse; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_UnPackReverseSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_f_array_randomread _f; + typename ArrayTypes<DeviceType>::t_f_array _fw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + + AtomVecKokkos_UnPackReverseSelf( + const typename DAT::tdual_f_array &f, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap): + _f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + _fw(j,0) += _f(i+_nfirst,0); + _fw(j,1) += _f(i+_nfirst,1); + _fw(j,2) += _f(i+_nfirst,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst) { + if(commKK->reverse_comm_on_host) { + sync(Host,F_MASK); + struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap); + Kokkos::parallel_for(n,f); + modified(Host,F_MASK); + } else { + sync(Device,F_MASK); + struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list,iswap); + Kokkos::parallel_for(n,f); + modified(Device,F_MASK); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_UnPackReverse { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_f_array _f; + typename ArrayTypes<DeviceType>::t_ffloat_2d_const _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + + AtomVecKokkos_UnPackReverse( + const typename DAT::tdual_f_array &f, + const typename DAT::tdual_ffloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap): + _f(f.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + _f(j,0) += _buf(i,0); + _f(j,1) += _buf(i,1); + _f(j,2) += _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_reverse_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_ffloat_2d &buf) +{ + // Check whether to always run reverse communication on the host + // Choose correct reverse UnPackReverse kernel + + if(commKK->reverse_comm_on_host) { + struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap); + Kokkos::parallel_for(n,f); + modified(Host,F_MASK); + } else { + struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list,iswap); + Kokkos::parallel_for(n,f); + modified(Device,F_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf) +{ + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } + + if(n > 0) + modified(Host,F_MASK); +} diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7f593f235f6736d0dd966c6989feb40632705b8a..20a07ec443690451a963ce14c51ed0f71873ae93 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -35,29 +35,48 @@ class AtomVecKokkos : public AtomVec { public: AtomVecKokkos(class LAMMPS *); virtual ~AtomVecKokkos() {} + virtual int pack_comm(int, int *, double *, int, int *); + virtual int pack_comm_vel(int, int *, double *, int, int *); + virtual void unpack_comm(int, int, double *); + virtual void unpack_comm_vel(int, int, double *); + virtual int pack_reverse(int, int, double *); + virtual void unpack_reverse(int, int *, double *); virtual void sync(ExecutionSpace space, unsigned int mask) = 0; virtual void modified(ExecutionSpace space, unsigned int mask) = 0; - virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {}; + virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0; virtual int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]) = 0; - //{return 0;} + const int &pbc_flag, const int pbc[]); + virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]) = 0; - //{return 0;} + const int &pbc_flag, const int pbc[]); + virtual void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf) = 0; + const DAT::tdual_xfloat_2d &buf); + + virtual int + unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst); + + virtual int + pack_reverse_kokkos(const int &n, const int &nfirst, + const DAT::tdual_ffloat_2d &buf); + + virtual void + unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const DAT::tdual_ffloat_2d &buf); + virtual int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) = 0; - //{return 0;}; + virtual void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf, @@ -68,15 +87,19 @@ class AtomVecKokkos : public AtomVec { DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; - //{return 0;}; + virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space) = 0; - //{return 0;}; + protected: + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + class CommKokkos *commKK; size_t buffer_size; void* buffer; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f5ed0f525fee6aeba2642e93c32b5fb5917cbc80..5534341342d5017378169609dce1a0c5964d8338 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); memory->sfree(sendlist); sendlist = NULL; - k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d(); + k_sendlist = DAT::tdual_int_2d(); + k_total_send = DAT::tdual_int_scalar("comm::k_total_send"); // error check for disallow of OpenMP threads? @@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_sendlist = ArrayTypes<LMPDeviceType>:: + k_exchange_sendlist = DAT:: tdual_int_1d("comm:k_exchange_sendlist",100); - k_exchange_copylist = ArrayTypes<LMPDeviceType>:: + k_exchange_copylist = DAT:: tdual_int_1d("comm:k_exchange_copylist",100); - k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1); - k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100); + k_count = DAT::tdual_int_scalar("comm:k_count"); + k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); memory->destroy(maxsendlist); maxsendlist = NULL; @@ -102,8 +103,10 @@ void CommKokkos::init() atomKK = (AtomKokkos *) atom; exchange_comm_classic = lmp->kokkos->exchange_comm_classic; forward_comm_classic = lmp->kokkos->forward_comm_classic; + reverse_comm_classic = lmp->kokkos->reverse_comm_classic; exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host; forward_comm_on_host = lmp->kokkos->forward_comm_on_host; + reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommBrick::init(); @@ -132,8 +135,11 @@ void CommKokkos::init() if (force->newton == 0) check_reverse = 0; if (force->pair) check_reverse += force->pair->comm_reverse_off; - if(check_reverse || check_forward) + if (ghost_velocity) forward_comm_classic = true; + + if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet + reverse_comm_classic = true; } /* ---------------------------------------------------------------------- @@ -173,7 +179,6 @@ void CommKokkos::forward_comm_device(int dummy) int n; MPI_Request request; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; - double **x = atom->x; double *buf; // exchange data with another proc @@ -181,32 +186,29 @@ void CommKokkos::forward_comm_device(int dummy) // if comm_x_only set, exchange or copy directly to x, don't unpack k_sendlist.sync<DeviceType>(); + atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK); for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) { if (comm_x_only) { - atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK); - if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; - else buf = NULL; - if (size_forward_recv[iswap]) { buf = atomKK->k_x.view<DeviceType>().ptr_on_device() + firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1(); MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); + recvproc[iswap],0,world,&request); } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) { MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(), n,MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: - space,X_MASK); + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: + space,X_MASK); + } } else if (ghost_velocity) { error->all(FLERR,"Ghost velocity forward comm not yet " "implemented with Kokkos"); @@ -248,21 +250,93 @@ void CommKokkos::forward_comm_device(int dummy) } } } + +/* ---------------------------------------------------------------------- + reverse communication of forces on atoms every timestep + other per-atom attributes may also be sent via pack/unpack routines +------------------------------------------------------------------------- */ + void CommKokkos::reverse_comm() { + if (!reverse_comm_classic) { + if (reverse_comm_on_host) reverse_comm_device<LMPHostType>(); + else reverse_comm_device<LMPDeviceType>(); + return; + } + k_sendlist.sync<LMPHostType>(); + if (comm_f_only) atomKK->sync(Host,F_MASK); else atomKK->sync(Host,ALL_MASK); + CommBrick::reverse_comm(); + if (comm_f_only) atomKK->modified(Host,F_MASK); else atomKK->modified(Host,ALL_MASK); - atomKK->sync(Device,ALL_MASK); + + //atomKK->sync(Device,ALL_MASK); // is this needed? +} + +template<class DeviceType> +void CommKokkos::reverse_comm_device() +{ + int n; + MPI_Request request; + AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; + double *buf; + + // exchange data with another proc + // if other proc is self, just copy + // if comm_f_only set, exchange or copy directly from f, don't pack + + k_sendlist.sync<DeviceType>(); + atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK); + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap] != me) { + if (comm_f_only) { + if (size_reverse_recv[iswap]) + MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE, + sendproc[iswap],0,world,&request); + if (size_reverse_send[iswap]) { + buf = atomKK->k_f.view<DeviceType>().ptr_on_device() + + firstrecv[iswap]*atomKK->k_f.view<DeviceType>().dimension_1(); + + MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, + recvproc[iswap],0,world); + } + if (size_reverse_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: + space,F_MASK); + } + } else { + if (size_reverse_recv[iswap]) + MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(), + size_reverse_recv[iswap],MPI_DOUBLE, + sendproc[iswap],0,world,&request); + n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); + if (n) + MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n, + MPI_DOUBLE,recvproc[iswap],0,world); + if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_recv); + } else { + if (sendnum[iswap]) + n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap]); + } + } } +/* ---------------------------------------------------------------------- */ + void CommKokkos::forward_comm_fix(Fix *fix, int size) { k_sendlist.sync<LMPHostType>(); @@ -408,7 +482,7 @@ struct BuildExchangeListFunctor { typename AT::t_x_array _x; int _nlocal,_dim; - typename AT::t_int_1d _nsend; + typename AT::t_int_scalar _nsend; typename AT::t_int_1d _sendlist; typename AT::t_int_1d _sendflag; @@ -416,7 +490,7 @@ struct BuildExchangeListFunctor { BuildExchangeListFunctor( const typename AT::tdual_x_array x, const typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d nsend, + typename AT::tdual_int_scalar nsend, typename AT::tdual_int_1d sendflag,int nlocal, int dim, X_FLOAT lo, X_FLOAT hi): _x(x.template view<DeviceType>()), @@ -430,7 +504,7 @@ struct BuildExchangeListFunctor { KOKKOS_INLINE_FUNCTION void operator() (int i) const { if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) { - const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1); + const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1); if(mysend<_sendlist.dimension_0()) { _sendlist(mysend) = i; _sendflag(i) = 1; @@ -489,9 +563,9 @@ void CommKokkos::exchange_device() if (true) { if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal); k_sendflag.sync<DeviceType>(); - k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0(); - while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { - k_count.h_view(0) = 0; + k_count.h_view() = k_exchange_sendlist.h_view.dimension_0(); + while (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) { + k_count.h_view() = 0; k_count.modify<LMPHostType>(); k_count.sync<DeviceType>(); @@ -504,10 +578,10 @@ void CommKokkos::exchange_device() k_count.modify<DeviceType>(); k_count.sync<LMPHostType>(); - if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) { - k_exchange_sendlist.resize(k_count.h_view(0)*1.1); - k_exchange_copylist.resize(k_count.h_view(0)*1.1); - k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0(); + if (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) { + k_exchange_sendlist.resize(k_count.h_view()*1.1); + k_exchange_copylist.resize(k_count.h_view()*1.1); + k_count.h_view()=k_exchange_sendlist.h_view.dimension_0(); } } k_exchange_copylist.sync<LMPHostType>(); @@ -515,22 +589,22 @@ void CommKokkos::exchange_device() k_sendflag.sync<LMPHostType>(); int sendpos = nlocal-1; - nlocal -= k_count.h_view(0); - for(int i = 0; i < k_count.h_view(0); i++) { + nlocal -= k_count.h_view(); + for(int i = 0; i < k_count.h_view(); i++) { if (k_exchange_sendlist.h_view(i)<nlocal) { while (k_sendflag.h_view(sendpos)) sendpos--; k_exchange_copylist.h_view(i) = sendpos; sendpos--; } else - k_exchange_copylist.h_view(i) = -1; + k_exchange_copylist.h_view(i) = -1; } k_exchange_copylist.modify<LMPHostType>(); k_exchange_copylist.sync<DeviceType>(); - nsend = k_count.h_view(0); + nsend = k_count.h_view(); if (nsend > maxsend) grow_send_kokkos(nsend,1); nsend = - avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send, + avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice<DeviceType>:: space,dim,lo,hi); @@ -640,9 +714,7 @@ void CommKokkos::borders() } atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); k_sendlist.sync<LMPHostType>(); - k_sendlist.modify<LMPHostType>(); CommBrick::borders(); k_sendlist.modify<LMPHostType>(); atomKK->modified(Host,ALL_MASK); @@ -659,11 +731,11 @@ struct BuildBorderListFunctor { int iswap,maxsendlist; int nfirst,nlast,dim; typename AT::t_int_2d sendlist; - typename AT::t_int_1d nsend; + typename AT::t_int_scalar nsend; BuildBorderListFunctor(typename AT::tdual_x_array _x, typename AT::tdual_int_2d _sendlist, - typename AT::tdual_int_1d _nsend,int _nfirst, + typename AT::tdual_int_scalar _nsend,int _nfirst, int _nlast, int _dim, X_FLOAT _lo, X_FLOAT _hi, int _iswap, int _maxsendlist): @@ -684,7 +756,7 @@ struct BuildBorderListFunctor { for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) { if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++; } - const int my_store_pos = dev.team_scan(mysend,&nsend(0)); + const int my_store_pos = dev.team_scan(mysend,&nsend()); if (my_store_pos+mysend < maxsendlist) { mysend = my_store_pos; @@ -713,7 +785,7 @@ void CommKokkos::borders_device() { AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space; - k_sendlist.modify<DeviceType>(); + k_sendlist.sync<DeviceType>(); atomKK->sync(exec_space,ALL_MASK); // do swaps over all 3 dimensions @@ -763,37 +835,38 @@ void CommKokkos::borders_device() { if (sendflag) { if (!bordergroup || ineed >= 2) { if (style == SINGLE) { - typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1); - total_send.h_view(0) = 0; - if(exec_space == Device) { - total_send.template modify<DeviceType>(); - total_send.template sync<LMPDeviceType>(); - } + k_total_send.h_view() = 0; + k_total_send.template modify<LMPHostType>(); + k_total_send.template sync<LMPDeviceType>(); BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify<DeviceType>(); - total_send.template sync<LMPHostType>(); + k_total_send.template modify<DeviceType>(); + k_total_send.template sync<LMPHostType>(); + + k_sendlist.modify<DeviceType>(); + + if(k_total_send.h_view() >= maxsendlist[iswap]) { + grow_list(iswap,k_total_send.h_view()); + + k_total_send.h_view() = 0; + k_total_send.template modify<LMPHostType>(); + k_total_send.template sync<LMPDeviceType>(); - if(total_send.h_view(0) >= maxsendlist[iswap]) { - grow_list(iswap,total_send.h_view(0)); - k_sendlist.modify<DeviceType>(); - total_send.h_view(0) = 0; - if(exec_space == Device) { - total_send.template modify<LMPHostType>(); - total_send.template sync<LMPDeviceType>(); - } BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify<DeviceType>(); - total_send.template sync<LMPHostType>(); + + k_total_send.template modify<DeviceType>(); + k_total_send.template sync<LMPHostType>(); + + k_sendlist.modify<DeviceType>(); } - nsend = total_send.h_view(0); + nsend = k_total_send.h_view(); } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); @@ -916,10 +989,11 @@ void CommKokkos::borders_device() { // reset global->local map - if (exec_space == Host) k_sendlist.sync<LMPDeviceType>(); atomKK->modified(exec_space,ALL_MASK); - atomKK->sync(Host,TAG_MASK); - if (map_style) atom->map_set(); + if (map_style) { + atomKK->sync(Host,TAG_MASK); + atom->map_set(); + } } /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra @@ -961,7 +1035,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); } else { - k_buf_send = ArrayTypes<LMPDeviceType>:: + k_buf_send = DAT:: tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); } @@ -975,7 +1049,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space) { maxrecv = static_cast<int> (BUFFACTOR * n); int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2; - k_buf_recv = ArrayTypes<LMPDeviceType>:: + k_buf_recv = DAT:: tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border); buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device(); } @@ -988,6 +1062,11 @@ void CommKokkos::grow_list(int iswap, int n) { int size = static_cast<int> (BUFFACTOR * n); + if (exchange_comm_classic) { // force realloc on Host + k_sendlist.sync<LMPHostType>(); + k_sendlist.modify<LMPHostType>(); + } + memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); for(int i=0;i<maxswap;i++) { @@ -1011,6 +1090,11 @@ void CommKokkos::grow_swap(int n) maxswap = n; int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN); + if (exchange_comm_classic) { // force realloc on Host + k_sendlist.sync<LMPHostType>(); + k_sendlist.modify<LMPHostType>(); + } + memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); memory->grow(maxsendlist,n,"comm:maxsendlist"); diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index a8ae973124f552167b19812ece2975b4d5072656..f137655cb8047fb1340d55283acc5e03ef59841f 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -25,15 +25,17 @@ class CommKokkos : public CommBrick { bool exchange_comm_classic; bool forward_comm_classic; + bool reverse_comm_classic; bool exchange_comm_on_host; bool forward_comm_on_host; + bool reverse_comm_on_host; CommKokkos(class LAMMPS *); ~CommKokkos(); void init(); void forward_comm(int dummy = 0); // forward comm of atom coords - void reverse_comm(); // reverse comm of atom coords + void reverse_comm(); // reverse comm of atom coords void exchange(); // move atoms to new procs void borders(); // setup list of atoms to comm @@ -47,15 +49,17 @@ class CommKokkos : public CommBrick { void reverse_comm_dump(class Dump *); // reverse comm from a Dump template<class DeviceType> void forward_comm_device(int dummy); + template<class DeviceType> void reverse_comm_device(); template<class DeviceType> void forward_comm_pair_device(Pair *pair); template<class DeviceType> void exchange_device(); template<class DeviceType> void borders_device(); protected: DAT::tdual_int_2d k_sendlist; + DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; - DAT::tdual_int_1d k_count; + DAT::tdual_int_scalar k_count; //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index e54b53ae89dfae19de258c12234728625f6650fe..5d2f6a0438a400302785c70975beee07d4d4f32b 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -63,6 +63,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : nmax = nmax = m_cap = 0; allocated_flag = 0; + nprev = 4; } /* ---------------------------------------------------------------------- */ @@ -158,15 +159,15 @@ void FixQEqReaxKokkos<DeviceType>::init_hist() { int i,j; - k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,5); + k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,nprev); d_s_hist = k_s_hist.template view<DeviceType>(); h_s_hist = k_s_hist.h_view; - k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,5); + k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,nprev); d_t_hist = k_t_hist.template view<DeviceType>(); h_t_hist = k_t_hist.h_view; for( i = 0; i < atom->nmax; i++ ) - for( j = 0; j < 5; j++ ) + for( j = 0; j < nprev; j++ ) k_s_hist.h_view(i,j) = k_t_hist.h_view(i,j) = 0.0; k_s_hist.template modify<LMPHostType>(); @@ -334,11 +335,11 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array() d_d = k_d.template view<DeviceType>(); h_d = k_d.h_view; - k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,5); + k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,nprev); d_s_hist = k_s_hist.template view<DeviceType>(); h_s_hist = k_s_hist.h_view; - k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,5); + k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,nprev); d_t_hist = k_t_hist.template view<DeviceType>(); h_t_hist = k_t_hist.h_view; } @@ -368,7 +369,7 @@ void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const d_o[i] = 0.0; d_r[i] = 0.0; d_d[i] = 0.0; - //for( int j = 0; j < 5; j++ ) + //for( int j = 0; j < nprev; j++ ) //d_s_hist(i,j) = d_t_hist(i,j) = 0.0; } @@ -1087,7 +1088,7 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q_item(int ii) const if (mask[i] & groupbit) { q(i) = d_s[i] - delta * d_t[i]; - for (int k = 4; k > 0; --k) { + for (int k = nprev-1; k > 0; --k) { d_s_hist(i,k) = d_s_hist(i,k-1); d_t_hist(i,k) = d_t_hist(i,k-1); } @@ -1173,7 +1174,7 @@ double FixQEqReaxKokkos<DeviceType>::memory_usage() { double bytes; - bytes = atom->nmax*5*2 * sizeof(F_FLOAT); // s_hist & t_hist + bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist bytes += atom->nmax*8 * sizeof(F_FLOAT); // storage bytes += n_cap*2 * sizeof(int); // matrix... bytes += m_cap * sizeof(int); diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 072a802b54e7534a8139e6f9092c65354b2c1ce9..2b02624dcef30f29947183cb4f2a5acb68916779 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -123,8 +123,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) neighflag_qeq_set = 0; exchange_comm_classic = 0; forward_comm_classic = 0; + reverse_comm_classic = 0; exchange_comm_on_host = 0; forward_comm_on_host = 0; + reverse_comm_on_host = 0; #ifdef KILL_KOKKOS_ON_SIGSEGV signal(SIGSEGV, my_signal_handler); @@ -158,8 +160,8 @@ void KokkosLMP::accelerator(int narg, char **arg) neighflag_qeq_set = 0; int newtonflag = 0; double binsize = 0.0; - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 0; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; int iarg = 0; while (iarg < narg) { @@ -200,13 +202,13 @@ void KokkosLMP::accelerator(int narg, char **arg) } else if (strcmp(arg[iarg],"comm") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) { - exchange_comm_classic = forward_comm_classic = 1; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; } else if (strcmp(arg[iarg+1],"host") == 0) { - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 1; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1; } else if (strcmp(arg[iarg+1],"device") == 0) { - exchange_comm_classic = forward_comm_classic = 0; - exchange_comm_on_host = forward_comm_on_host = 0; + exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; + exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"comm/exchange") == 0) { @@ -231,6 +233,17 @@ void KokkosLMP::accelerator(int narg, char **arg) forward_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"comm/reverse") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1; + else if (strcmp(arg[iarg+1],"host") == 0) { + reverse_comm_classic = 0; + reverse_comm_on_host = 1; + } else if (strcmp(arg[iarg+1],"device") == 0) { + reverse_comm_classic = 0; + reverse_comm_on_host = 0; + } else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 8e28b38cbf45b1e14c2a6e6da7fc59ff0f7b0920..7b7848f1f08ddfe3dbdbfcadfc13874e53218988 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -27,8 +27,10 @@ class KokkosLMP : protected Pointers { int neighflag_qeq_set; int exchange_comm_classic; int forward_comm_classic; + int reverse_comm_classic; int exchange_comm_on_host; int forward_comm_on_host; + int reverse_comm_on_host; int num_threads,ngpu; int numa; int auto_sync; diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index c7e815928a185f7255d36749121d17615d546661..95ea105ad9df6d95f0d2ddba64154f1af64d0579 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -75,6 +75,10 @@ void NBinKokkos<DeviceType>::bin_atoms_setup(int nall) k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins); bincount = k_bincount.view<DeviceType>(); } + if (nall > k_atom2bin.d_view.dimension_0()) { + k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall); + atom2bin = k_atom2bin.view<DeviceType>(); + } } /* ---------------------------------------------------------------------- @@ -86,6 +90,10 @@ void NBinKokkos<DeviceType>::bin_atoms() { last_bin = update->ntimestep; + k_bins.template sync<DeviceType>(); + k_bincount.template sync<DeviceType>(); + k_atom2bin.template sync<DeviceType>(); + h_resize() = 1; while(h_resize() > 0) { @@ -115,6 +123,10 @@ void NBinKokkos<DeviceType>::bin_atoms() c_bins = bins; } } + + k_bins.template modify<DeviceType>(); + k_bincount.template modify<DeviceType>(); + k_atom2bin.template modify<DeviceType>(); } /* ---------------------------------------------------------------------- */ @@ -125,6 +137,7 @@ void NBinKokkos<DeviceType>::binatomsItem(const int &i) const { const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2)); + atom2bin(i) = ibin; const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1); if(ac < bins.dimension_1()) { bins(ibin, ac) = i; diff --git a/src/KOKKOS/nbin_kokkos.h b/src/KOKKOS/nbin_kokkos.h index de3cf41d19c816952c6e4b1540b536d1baa42a77..bf2ccc59086ed0ed1a27dfd545bab85dbd327911 100644 --- a/src/KOKKOS/nbin_kokkos.h +++ b/src/KOKKOS/nbin_kokkos.h @@ -44,11 +44,13 @@ class NBinKokkos : public NBinStandard { int atoms_per_bin; DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; + DAT::tdual_int_1d k_atom2bin; typename AT::t_int_1d bincount; const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; typename AT::t_int_2d_const c_bins; + typename AT::t_int_1d atom2bin; typename AT::t_int_scalar d_resize; typename ArrayTypes<LMPHostType>::t_int_scalar h_resize; typename AT::t_x_array_randomread x; diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index caf2dfee561c49b3482750871157d52e36a75d99..04454e53cb302e2ad696809c034e1925a8aaa83d 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -49,15 +49,6 @@ void NeighListKokkos<Device>::grow(int nmax) d_neighbors = typename ArrayTypes<Device>::t_neighbors_2d("neighlist:neighbors", maxatoms,maxneighs); - - memory->sfree(firstneigh); - memory->sfree(firstdouble); - - firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *), - "neighlist:firstneigh"); - if (dnum) - firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *), - "neighlist:firstdouble"); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 9a40808052f138bfaaf4bfbef49515f5a08386df..f34b149864d1c24179fdf2f41be36143ed5f9807 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -310,9 +310,9 @@ void NeighborKokkos::build_kokkos(int topoflag) // build pairwise lists for all perpetual NPair/NeighList // grow() with nlocal/nall args so that only realloc if have to - atomKK->sync(Host,ALL_MASK); for (i = 0; i < npair_perpetual; i++) { m = plist[i]; + if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK); if (!lists[m]->copy) lists[m]->grow(nlocal,nall); neigh_pair[m]->build_setup(); neigh_pair[m]->build(lists[m]); diff --git a/src/KOKKOS/npair_copy_kokkos.cpp b/src/KOKKOS/npair_copy_kokkos.cpp index 6835d8c1b54ee37d5be17f7d60cfc1dda1370cbd..8702816033799d00aa871d47012404326f831ce8 100644 --- a/src/KOKKOS/npair_copy_kokkos.cpp +++ b/src/KOKKOS/npair_copy_kokkos.cpp @@ -41,10 +41,7 @@ void NPairCopyKokkos<DeviceType>::build(NeighList *list) list->gnum = listcopy->gnum; list->ilist = listcopy->ilist; list->numneigh = listcopy->numneigh; - list->firstneigh = listcopy->firstneigh; - list->firstdouble = listcopy->firstdouble; list->ipage = listcopy->ipage; - list->dpage = listcopy->dpage; NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list; NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index b568bd5c93923e0e25fe6fa32fb913b9f602c651..d3cdcb0680efef3420d0e1a44f966a265759defc 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -73,6 +73,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info() atoms_per_bin = nbKK->atoms_per_bin; k_bincount = nbKK->k_bincount; k_bins = nbKK->k_bins; + k_atom2bin = nbKK->k_atom2bin; } /* ---------------------------------------------------------------------- @@ -88,13 +89,15 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info() int maxstencil = ns->get_maxstencil(); - k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + if (maxstencil > k_stencil.dimension_0()) + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); for (int k = 0; k < maxstencil; k++) k_stencil.h_view(k) = ns->stencil[k]; k_stencil.modify<LMPHostType>(); k_stencil.sync<DeviceType>(); if (GHOST) { - k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + if (maxstencil > k_stencilxyz.dimension_0()) + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); for (int k = 0; k < maxstencil; k++) { k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; @@ -122,6 +125,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_cutneighsq.view<DeviceType>(), k_bincount.view<DeviceType>(), k_bins.view<DeviceType>(), + k_atom2bin.view<DeviceType>(), nstencil, k_stencil.view<DeviceType>(), k_stencilxyz.view<DeviceType>(), @@ -164,8 +168,9 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_ex_mol_group.sync<DeviceType>(); k_ex_mol_bit.sync<DeviceType>(); k_ex_mol_intra.sync<DeviceType>(); - k_bincount.sync<DeviceType>(), - k_bins.sync<DeviceType>(), + k_bincount.sync<DeviceType>(); + k_bins.sync<DeviceType>(); + k_atom2bin.sync<DeviceType>(); atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); data.special_flag[0] = special_flag[0]; @@ -317,7 +322,7 @@ void NeighborKokkosExecute<DeviceType>:: const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const int ibin = coord2bin(xtmp, ytmp, ztmp); + const int ibin = c_atom2bin(i); const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil; @@ -431,7 +436,7 @@ void NeighborKokkosExecute<DeviceType>:: if(n > neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } neigh_list.d_ilist(i) = i; @@ -641,7 +646,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli if(n > neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } } } @@ -678,7 +683,7 @@ void NeighborKokkosExecute<DeviceType>:: // no molecular test when i = ghost atom if (i < nlocal) { - const int ibin = coord2bin(xtmp, ytmp, ztmp); + const int ibin = c_atom2bin(i); for (int k = 0; k < nstencil; k++) { const int jbin = ibin + stencil[k]; for(int m = 0; m < c_bincount(jbin); m++) { @@ -764,7 +769,7 @@ void NeighborKokkosExecute<DeviceType>:: if(n > neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } neigh_list.d_ilist(i) = i; } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 517ea546fa1cff1e053fe84105a615316f8687f6..6c1c0e958b49dcd09ff7bb6b32efca67b2d3f6a6 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -105,6 +105,7 @@ class NPairKokkos : public NPair { int atoms_per_bin; DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; + DAT::tdual_int_1d k_atom2bin; // data from NStencil class @@ -148,6 +149,8 @@ class NeighborKokkosExecute const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; typename AT::t_int_2d_const c_bins; + const typename AT::t_int_1d atom2bin; + const typename AT::t_int_1d_const c_atom2bin; // data from NStencil class @@ -190,6 +193,7 @@ class NeighborKokkosExecute const typename AT::t_xfloat_2d_randomread &_cutneighsq, const typename AT::t_int_1d &_bincount, const typename AT::t_int_2d &_bins, + const typename AT::t_int_1d &_atom2bin, const int _nstencil, const typename AT::t_int_1d &_d_stencil, const typename AT::t_int_1d_3 &_d_stencilxyz, @@ -224,6 +228,7 @@ class NeighborKokkosExecute const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), + atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), nlocal(_nlocal), x(_x),type(_type),mask(_mask),molecule(_molecule), @@ -281,38 +286,6 @@ class NeighborKokkosExecute void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const; #endif - KOKKOS_INLINE_FUNCTION - int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const - { - int ix,iy,iz; - - if (x >= bboxhi[0]) - ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx; - else if (x >= bboxlo[0]) { - ix = static_cast<int> ((x-bboxlo[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1; - - if (y >= bboxhi[1]) - iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny; - else if (y >= bboxlo[1]) { - iy = static_cast<int> ((y-bboxlo[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1; - - if (z >= bboxhi[2]) - iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz; - else if (z >= bboxlo[2]) { - iz = static_cast<int> ((z-bboxlo[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const { diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index b73e54e33fb469dd4ffbaefa8314532eadbe2559..9f447bda1a8038874064a54035e0c9de569c0a0c 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -70,6 +70,7 @@ void NPairSSAKokkos<DeviceType>::copy_neighbor_info() k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; k_ex_mol_bit = neighborKK->k_ex_mol_bit; + k_ex_mol_intra = neighborKK->k_ex_mol_intra; } /* ---------------------------------------------------------------------- @@ -217,8 +218,12 @@ int NPairSSAKokkosExecute<DeviceType>::exclusion(const int &i,const int &j, if (nex_mol) { for (m = 0; m < nex_mol; m++) - if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) && - molecule(i) == molecule(j)) return 1; + if (ex_mol_intra[m]) { // intra-chain: exclude i-j pair if on same molecule + if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && + molecule[i] == molecule[j]) return 1; + } else // exclude i-j pair if on different molecules + if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && + molecule[i] != molecule[j]) return 1; } return 0; @@ -418,6 +423,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu nex_mol, k_ex_mol_group.view<DeviceType>(), k_ex_mol_bit.view<DeviceType>(), + k_ex_mol_intra.view<DeviceType>(), bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, domain->xprd_half,domain->yprd_half,domain->zprd_half); @@ -432,6 +438,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ex2_bit.sync<DeviceType>(); k_ex_mol_group.sync<DeviceType>(); k_ex_mol_bit.sync<DeviceType>(); + k_ex_mol_intra.sync<DeviceType>(); k_bincount.sync<DeviceType>(); k_bins.sync<DeviceType>(); k_gbincount.sync<DeviceType>(); diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 98046feba88b4fb239004228cfa940f74946ca2b..17a23b2811b221db561949e7313635b48f4f0a90 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -76,6 +76,7 @@ class NPairSSAKokkos : public NPair { DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; + DAT::tdual_int_1d k_ex_mol_intra; // data from NBinSSA class @@ -123,6 +124,7 @@ class NPairSSAKokkosExecute const int nex_mol; const typename AT::t_int_1d_const ex_mol_group; const typename AT::t_int_1d_const ex_mol_bit; + const typename AT::t_int_1d_const ex_mol_intra; // data from NBinSSA class @@ -233,6 +235,7 @@ class NPairSSAKokkosExecute const int & _nex_mol, const typename AT::t_int_1d_const & _ex_mol_group, const typename AT::t_int_1d_const & _ex_mol_bit, + const typename AT::t_int_1d_const & _ex_mol_intra, const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): @@ -266,6 +269,7 @@ class NPairSSAKokkosExecute ex1_group(_ex1_group),ex2_group(_ex2_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), + ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index d95cd8f8aefd68b5699ee89046352a30771b3db9..d5f83f45373d9e4d32d6032f0fe343489077e885 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -131,6 +131,8 @@ template<class DeviceType> void PairReaxCKokkos<DeviceType>::init_style() { PairReaxC::init_style(); + if (fix_reax) modify->delete_fix("REAXC"); // not needed in the Kokkos version + fix_reax = NULL; // irequest = neigh request made by parent class @@ -555,8 +557,8 @@ void PairReaxCKokkos<DeviceType>::Deallocate_Lookup_Tables() ntypes = atom->ntypes; - for( i = 0; i < ntypes; ++i ) { - for( j = i; j < ntypes; ++j ) + for( i = 0; i <= ntypes; ++i ) { + for( j = i; j <= ntypes; ++j ) if( LR[i][j].n ) { sfree( LR[i][j].y, "LR[i,j].y" ); sfree( LR[i][j].H, "LR[i,j].H" ); diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index e4a3f857d3d24829055c6ce5dc6cfd1ff1a02d52..adec5ff1bd9d7520c2c469638d497759375c362a 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -294,6 +294,7 @@ void VerletKokkos::run(int n) int n_pre_exchange = modify->n_pre_exchange; int n_pre_neighbor = modify->n_pre_neighbor; int n_pre_force = modify->n_pre_force; + int n_pre_reverse = modify->n_pre_reverse; int n_post_force = modify->n_post_force; int n_end_of_step = modify->n_end_of_step; @@ -304,9 +305,9 @@ void VerletKokkos::run(int n) f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0()); - static double time = 0.0; atomKK->sync(Device,ALL_MASK); - Kokkos::Impl::Timer ktimer; + //static double time = 0.0; + //Kokkos::Impl::Timer ktimer; timer->init_timeout(); for (int i = 0; i < n; i++) { @@ -320,10 +321,10 @@ void VerletKokkos::run(int n) // initial time integration - ktimer.reset(); + //ktimer.reset(); timer->stamp(); modify->initial_integrate(vflag); - time += ktimer.seconds(); + //time += ktimer.seconds(); if (n_post_integrate) modify->post_integrate(); timer->stamp(Timer::MODIFY); @@ -523,11 +524,18 @@ void VerletKokkos::run(int n) atomKK->k_f.modify<LMPDeviceType>(); } + if (n_pre_reverse) { + modify->pre_reverse(eflag,vflag); + timer->stamp(Timer::MODIFY); + } // reverse communication of forces - if (force->newton) comm->reverse_comm(); - timer->stamp(Timer::COMM); + if (force->newton) { + Kokkos::fence(); + comm->reverse_comm(); + timer->stamp(Timer::COMM); + } // force modifications, final time integration, diagnostics diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp index 4cfb9b72671639a133cc3ddc05b356f7cb9616d9..7df8ebac6874b9ca02a4171b8e2e14666ad0d9b4 100644 --- a/src/KSPACE/pair_buck_long_coul_long.cpp +++ b/src/KSPACE/pair_buck_long_coul_long.cpp @@ -233,7 +233,8 @@ void PairBuckLongCoulLong::init_style() if (!atom->q_flag && (ewald_order&(1<<1))) error->all(FLERR, - "Invoking coulombic in pair style buck/long/coul/long requires atom attribute q"); + "Invoking coulombic in pair style buck/long/coul/long " + "requires atom attribute q"); // ensure use of KSpace long-range solver, set two g_ewalds @@ -258,51 +259,25 @@ void PairBuckLongCoulLong::init_style() if (force->kspace->neighrequest_flag) { int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; } cut_coulsq = cut_coul * cut_coul; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairBuckLongCoulLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -651,14 +626,14 @@ void PairBuckLongCoulLong::compute_inner() double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi; vector xi, d; - ineighn = (ineigh = listinner->ilist) + listinner->inum; + ineighn = (ineigh = list->ilist_inner) + list->inum_inner; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; if (order1) qri = qqrd2e*q[i]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; + jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; @@ -740,7 +715,7 @@ void PairBuckLongCoulLong::compute_middle() double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi; vector xi, d; - ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum; + ineighn = (ineigh = list->ilist_middle)+list->inum_middle; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; @@ -748,7 +723,7 @@ void PairBuckLongCoulLong::compute_middle() memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; + jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; @@ -839,7 +814,7 @@ void PairBuckLongCoulLong::compute_outer(int eflag, int vflag) double cut_in_off_sq = cut_in_off*cut_in_off; double cut_in_on_sq = cut_in_on*cut_in_on; - ineighn = (ineigh = listouter->ilist)+listouter->inum; + ineighn = (ineigh = list->ilist)+list->inum; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; @@ -849,7 +824,7 @@ void PairBuckLongCoulLong::compute_outer(int eflag, int vflag) buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei]; cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; diff --git a/src/KSPACE/pair_buck_long_coul_long.h b/src/KSPACE/pair_buck_long_coul_long.h index cf752a09b0604c4c676b6d309b7ed3c0a248e089..40fe7c417fa614f3db4133d00e6bacd380145bfe 100644 --- a/src/KSPACE/pair_buck_long_coul_long.h +++ b/src/KSPACE/pair_buck_long_coul_long.h @@ -35,7 +35,6 @@ class PairBuckLongCoulLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/KSPACE/pair_lj_charmm_coul_long.cpp b/src/KSPACE/pair_lj_charmm_coul_long.cpp index 7b3fdd4b5f6f3c7987c16a5687ec1c274aba5c74..daa493cef06e6327966c9dfb490a33c071ff8a22 100644 --- a/src/KSPACE/pair_lj_charmm_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmm_coul_long.cpp @@ -242,10 +242,10 @@ void PairLJCharmmCoulLong::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; // loop over neighbors of my atoms @@ -320,10 +320,10 @@ void PairLJCharmmCoulLong::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; // loop over neighbors of my atoms @@ -417,10 +417,10 @@ void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; // loop over neighbors of my atoms @@ -687,36 +687,23 @@ void PairLJCharmmCoulLong::init_style() error->all(FLERR, "Pair style lj/charmm/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists + // request regular or rRESPA neighbor list int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; // require cut_lj_inner < cut_lj @@ -767,19 +754,6 @@ void PairLJCharmmCoulLong::init_style() if (ncoultablebits) init_tables(cut_coul,cut_respa); } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCharmmCoulLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/KSPACE/pair_lj_charmm_coul_long.h b/src/KSPACE/pair_lj_charmm_coul_long.h index 1544f3bc1420e100be265960debd22e157e5d84e..95c6d0d1c72aa179cf4d58001cb39f79fcf08322 100644 --- a/src/KSPACE/pair_lj_charmm_coul_long.h +++ b/src/KSPACE/pair_lj_charmm_coul_long.h @@ -33,7 +33,6 @@ class PairLJCharmmCoulLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/KSPACE/pair_lj_charmm_coul_msm.cpp b/src/KSPACE/pair_lj_charmm_coul_msm.cpp index 76c9ef0cc7ba953b995cc5141e21e9f931ba79b0..00617c0bf2331a51ba721cc9a8ba05ba5209f2ec 100644 --- a/src/KSPACE/pair_lj_charmm_coul_msm.cpp +++ b/src/KSPACE/pair_lj_charmm_coul_msm.cpp @@ -278,10 +278,10 @@ void PairLJCharmmCoulMSM::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp index 30d8ab64b60f086c89d328402aa0b2ba8c45c282..859f4217631c7fe17b5fb29b636da2724e9e0cf8 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp @@ -274,10 +274,10 @@ void PairLJCharmmfswCoulLong::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -359,10 +359,10 @@ void PairLJCharmmfswCoulLong::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -465,10 +465,10 @@ void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -824,19 +824,6 @@ void PairLJCharmmfswCoulLong::init_style() if (ncoultablebits) init_tables(cut_coul,cut_respa); } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCharmmfswCoulLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.h b/src/KSPACE/pair_lj_charmmfsw_coul_long.h index 650a908e4851dfe7ddb2102fa6b69961cdc727dc..135b82ea72160224c01baf9213ed97659a02be39 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.h +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.h @@ -33,7 +33,6 @@ class PairLJCharmmfswCoulLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/KSPACE/pair_lj_cut_coul_long.cpp b/src/KSPACE/pair_lj_cut_coul_long.cpp index f8be9fdb79957699414f0d2fe9fbf44368f48807..3096df2b01626e1fbfd255c176ed0803bf07817c 100644 --- a/src/KSPACE/pair_lj_cut_coul_long.cpp +++ b/src/KSPACE/pair_lj_cut_coul_long.cpp @@ -224,10 +224,10 @@ void PairLJCutCoulLong::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -309,10 +309,10 @@ void PairLJCutCoulLong::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -410,10 +410,10 @@ void PairLJCutCoulLong::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -656,36 +656,23 @@ void PairLJCutCoulLong::init_style() if (!atom->q_flag) error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q"); - // request regular or rRESPA neighbor lists + // request regular or rRESPA neighbor list int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; cut_coulsq = cut_coul * cut_coul; @@ -707,19 +694,6 @@ void PairLJCutCoulLong::init_style() if (ncoultablebits) init_tables(cut_coul,cut_respa); } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCutCoulLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/KSPACE/pair_lj_cut_coul_long.h b/src/KSPACE/pair_lj_cut_coul_long.h index 886542d07550d15d6fdf9b28b8b135feae4f04ce..e6f97c088dab552386ebaa7ec74d7917fb865c60 100644 --- a/src/KSPACE/pair_lj_cut_coul_long.h +++ b/src/KSPACE/pair_lj_cut_coul_long.h @@ -33,7 +33,6 @@ class PairLJCutCoulLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/KSPACE/pair_lj_cut_coul_msm.cpp b/src/KSPACE/pair_lj_cut_coul_msm.cpp index e3b3f58fcbf83355b8a16d9c4c711022ca881684..9f901db9fcfbb2375734877b0a918368cd49d5cb 100644 --- a/src/KSPACE/pair_lj_cut_coul_msm.cpp +++ b/src/KSPACE/pair_lj_cut_coul_msm.cpp @@ -265,10 +265,10 @@ void PairLJCutCoulMSM::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; diff --git a/src/KSPACE/pair_lj_long_coul_long.cpp b/src/KSPACE/pair_lj_long_coul_long.cpp index 7c6adfcb41609748e41e026769f5a38de178cab4..61b69011f1ac5ae04ca191bd0423f5a4296d7e27 100644 --- a/src/KSPACE/pair_lj_long_coul_long.cpp +++ b/src/KSPACE/pair_lj_long_coul_long.cpp @@ -253,51 +253,25 @@ void PairLJLongCoulLong::init_style() if (force->kspace->neighrequest_flag) { int irequest; - + int respa = 0; + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; - - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } - - } else irequest = neighbor->request(this,instance_me); + } + + irequest = neighbor->request(this,instance_me); + + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; } cut_coulsq = cut_coul * cut_coul; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJLongCoulLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -649,13 +623,13 @@ void PairLJLongCoulLong::compute_inner() double qri, *cut_ljsqi, *lj1i, *lj2i; vector xi, d; - ineighn = (ineigh = listinner->ilist)+listinner->inum; + ineighn = (ineigh = list->ilist_inner)+list->inum_inner; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_ljsqi = cut_ljsq[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; - jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; + jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; ni = sbmask(j); @@ -736,7 +710,7 @@ void PairLJLongCoulLong::compute_middle() double qri, *cut_ljsqi, *lj1i, *lj2i; vector xi, d; - ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum; + ineighn = (ineigh = list->ilist_middle)+list->inum_middle; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; @@ -744,7 +718,7 @@ void PairLJLongCoulLong::compute_middle() memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_ljsqi = cut_ljsq[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; - jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; + jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i]; for (; jneigh<jneighn; ++jneigh) { j = *jneigh; @@ -833,7 +807,7 @@ void PairLJLongCoulLong::compute_outer(int eflag, int vflag) double cut_in_off_sq = cut_in_off*cut_in_off; double cut_in_on_sq = cut_in_on*cut_in_on; - ineighn = (ineigh = listouter->ilist)+listouter->inum; + ineighn = (ineigh = list->ilist)+list->inum; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; @@ -842,7 +816,7 @@ void PairLJLongCoulLong::compute_outer(int eflag, int vflag) lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; diff --git a/src/KSPACE/pair_lj_long_coul_long.h b/src/KSPACE/pair_lj_long_coul_long.h index 22704c79fae39f32039a9d58b053dbf02c78274f..f11c81e28915fe19adc7c6cf13391493a5d54c03 100644 --- a/src/KSPACE/pair_lj_long_coul_long.h +++ b/src/KSPACE/pair_lj_long_coul_long.h @@ -34,7 +34,6 @@ class PairLJLongCoulLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp index 1dc1ca1cb451f0a09fa30e7925efae444bacd98f..1b0eb12e2c256423568c1984cdfb2742dfcd9fd1 100644 --- a/src/KSPACE/pair_lj_long_tip4p_long.cpp +++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp @@ -516,10 +516,10 @@ void PairLJLongTIP4PLong::compute_inner() int ni; double *lj1i, *lj2i; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; // loop over neighbors of my atoms @@ -769,10 +769,10 @@ void PairLJLongTIP4PLong::compute_middle() int ni; double *lj1i, *lj2i; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; // loop over neighbors of my atoms @@ -1049,10 +1049,10 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) double cut_in_off_sq = cut_in_off*cut_in_off; double cut_in_on_sq = cut_in_on*cut_in_on; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; // loop over neighbors of my atoms diff --git a/src/MAKE/MACHINES/Makefile.cori2 b/src/MAKE/MACHINES/Makefile.cori2 index a367d540808add4aeb036aa6a2cabd0745fc98a0..45e1ab1f8a3aed152bd96ce567b858d0148eaee8 100755 --- a/src/MAKE/MACHINES/Makefile.cori2 +++ b/src/MAKE/MACHINES/Makefile.cori2 @@ -15,13 +15,14 @@ SHELL = /bin/sh CC = CC OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG -DLMP_INTEL_NO_TBB \ + $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M LINK = CC -LINKFLAGS = -g -qopenmp $(OPTFLAGS) +LINKFLAGS = -qopenmp $(OPTFLAGS) LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.icex b/src/MAKE/MACHINES/Makefile.icex deleted file mode 100644 index e9e0c30857f4b2be1767d3a8706fc4e73786c649..0000000000000000000000000000000000000000 --- a/src/MAKE/MACHINES/Makefile.icex +++ /dev/null @@ -1,119 +0,0 @@ -# mpi = MPI with its default compiler - -SHELL = /bin/sh - -# --------------------------------------------------------------------- -# compiler/linker settings -# specify flags and libraries needed for your compiler - -KOKKOS_PATH=/home/stefan/projects/lammps-mine/lib/kokkos -CC = mpicxx -CCFLAGS = -g -O3 -Wall -Wextra -frounding-math -fsignaling-nans -march=native -SHFLAGS = -shared -MD -mcmodel=medium -fpic -fPIC -DEPFLAGS = -M - -LINK = mpicxx -LINKFLAGS = -g -O -LIB = -SIZE = size - -ARCHIVE = ar -ARFLAGS = -rc -SHLIBFLAGS = -shared -KOKKOS_DEVICES = Cuda,OpenMP -KOKKOS_ARCH = Pascal61 - -# --------------------------------------------------------------------- -# LAMMPS-specific settings, all OPTIONAL -# specify settings for LAMMPS features you will use -# if you change any -D setting, do full re-compile after "make clean" - -# LAMMPS ifdef settings -# see possible settings in Section 2.2 (step 4) of manual - -LMP_INC = -DLAMMPS_GZIP -#LMP_INC += -DLAMMPS_JPEG -LMP_INC += -DLAMMPS_MEMALIGN=64 - -# MPI library -# see discussion in Section 2.2 (step 5) of manual -# MPI wrapper compiler/linker can provide this info -# can point to dummy MPI library in src/STUBS as in Makefile.serial -# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts -# INC = path for mpi.h, MPI compiler settings -# PATH = path for MPI library -# LIB = name of MPI library - -MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -MPI_PATH = -MPI_LIB = - -# FFT library -# see discussion in Section 2.2 (step 6) of manual -# can be left blank to use provided KISS FFT library -# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings -# PATH = path for FFT library -# LIB = name of FFT library - -FFT_INC = -FFT_PATH = -FFT_LIB = - -# JPEG and/or PNG library -# see discussion in Section 2.2 (step 7) of manual -# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC -# INC = path(s) for jpeglib.h and/or png.h -# PATH = path(s) for JPEG library and/or PNG library -# LIB = name(s) of JPEG library and/or PNG library - -JPG_INC = -JPG_PATH = -JPG_LIB = - -# --------------------------------------------------------------------- -# build rules and dependencies -# do not edit this section - -include Makefile.package.settings -include Makefile.package - -EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) -EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) -EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) - -# Path to src files - -vpath %.cpp .. -vpath %.h .. - -# Link target - -$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) - $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) - $(SIZE) $(EXE) - -# Library targets - -lib: $(OBJ) $(EXTRA_LINK_DEPENDS) - $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) - -shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ - $(OBJ) $(EXTRA_LIB) $(LIB) - -# Compilation rules - -%.o:%.cpp - $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< - -# Individual dependencies - -depend : fastdep.exe $(SRC) - @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 - -fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< - -sinclude .depend diff --git a/src/MAKE/OPTIONS/Makefile.intel_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_coprocessor index a717be93ff54387708aa8f1080ddab40d5ec9a1f..75e4d89170c5f2d2431dfe1151bd1b23b3a3bd14 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_coprocessor +++ b/src/MAKE/OPTIONS/Makefile.intel_coprocessor @@ -10,7 +10,7 @@ CC = mpiicpc MIC_OPT = -qoffload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ -xHost -fno-alias -ansi-alias -restrict -DLMP_INTEL_USELRT \ - -qoverride-limits $(MIC_OPT) + -qoverride-limits $(MIC_OPT) -DLMP_USE_MKL_RNG SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu old mode 100755 new mode 100644 index b7db0645740a1fa62db80bc4e3637500af7f9fce..41d0f959feeba082640a3da8aa883421ee786cdd --- a/src/MAKE/OPTIONS/Makefile.intel_cpu +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu @@ -8,14 +8,14 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpiicpc -LINKFLAGS = -g -qopenmp $(OPTFLAGS) -LIB = -ltbbmalloc -ltbbmalloc_proxy +LINKFLAGS = -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc SIZE = size ARCHIVE = ar diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 8a45b781f87d64c9ce36ee0ae9024650c17d9ccd..ef514f43c68f4401e62113cfc4c5ff34a7565175 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -8,8 +8,8 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich index 40d517bce41bfb5a720f8badfe06b54993bfbe01..e4dc74d79b353d6984b7af74892a9455cbb79b37 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich @@ -8,14 +8,14 @@ SHELL = /bin/sh CC = mpicxx -cxx=icc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -cxx=icc -LINKFLAGS = -g -qopenmp $(OPTFLAGS) -LIB = +LINKFLAGS = -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc SIZE = size ARCHIVE = ar diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi index fe1be99e5881390a7b919a505ba75dc352685835..457a64b223a1fbd0c4859720ca939d3e99e96d5c 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi @@ -9,14 +9,14 @@ SHELL = /bin/sh export OMPI_CXX = icc CC = mpicxx OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -qopenmp $(OPTFLAGS) -LIB = -ltbbmalloc -ltbbmalloc_proxy +LINKFLAGS = -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc SIZE = size ARCHIVE = ar diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor deleted file mode 100644 index 406e98b36d542c23e6d1d79ab5bc6d83d45f009e..0000000000000000000000000000000000000000 --- a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor +++ /dev/null @@ -1,123 +0,0 @@ -# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT - -SHELL = /bin/sh - -# --------------------------------------------------------------------- -# compiler/linker settings -# specify flags and libraries needed for your compiler - -CC = mpiicpc -MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2 -CCFLAGS = -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ - -xHost -fno-alias -ansi-alias -restrict \ - -qoverride-limits $(MIC_OPT) -DLMP_INTEL_USELRT -SHFLAGS = -fPIC -DEPFLAGS = -M - -LINK = mpiicpc -LINKFLAGS = -g -O3 -xHost -qopenmp -qoffload $(MIC_OPT) -LIB = -ltbbmalloc -SIZE = size - -ARCHIVE = ar -ARFLAGS = -rc -SHLIBFLAGS = -shared - -# --------------------------------------------------------------------- -# LAMMPS-specific settings, all OPTIONAL -# specify settings for LAMMPS features you will use -# if you change any -D setting, do full re-compile after "make clean" - -# LAMMPS ifdef settings -# see possible settings in Section 2.2 (step 4) of manual - -LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG - -# MPI library -# see discussion in Section 2.2 (step 5) of manual -# MPI wrapper compiler/linker can provide this info -# can point to dummy MPI library in src/STUBS as in Makefile.serial -# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts -# INC = path for mpi.h, MPI compiler settings -# PATH = path for MPI library -# LIB = name of MPI library - -MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -MPI_PATH = -MPI_LIB = - -# FFT library -# see discussion in Section 2.2 (step 6) of manaul -# can be left blank to use provided KISS FFT library -# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings -# PATH = path for FFT library -# LIB = name of FFT library - -FFT_INC = -DFFT_MKL -DFFT_SINGLE -FFT_PATH = -FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core - -# JPEG and/or PNG library -# see discussion in Section 2.2 (step 7) of manual -# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC -# INC = path(s) for jpeglib.h and/or png.h -# PATH = path(s) for JPEG library and/or PNG library -# LIB = name(s) of JPEG library and/or PNG library - -JPG_INC = -JPG_PATH = -JPG_LIB = -ljpeg - -# --------------------------------------------------------------------- -# build rules and dependencies -# do not edit this section - -include Makefile.package.settings -include Makefile.package - -EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) -EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) -EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) - -# Path to src files - -vpath %.cpp .. -vpath %.h .. - -# Link target - -$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) - $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) - $(SIZE) $(EXE) - -# Library targets - -lib: $(OBJ) $(EXTRA_LINK_DEPENDS) - $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) - -shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ - $(OBJ) $(EXTRA_LIB) $(LIB) - -# Compilation rules - -%.o:%.cpp $(EXTRA_CPP_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< - -%.d:%.cpp $(EXTRA_CPP_DEPENDS) - $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ - -%.o:%.cu $(EXTRA_CPP_DEPENDS) - $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< - -# Individual dependencies - -depend : fastdep.exe $(SRC) - @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 - -fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< - -sinclude .depend diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl index 881c51f0e4de91654743c024f0af146459206e4a..8e266a4fce859e58e661080e6bd446489a7dec1d 100644 --- a/src/MAKE/OPTIONS/Makefile.knl +++ b/src/MAKE/OPTIONS/Makefile.knl @@ -8,13 +8,13 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) +CCFLAGS = -qopenmp -qno-offload -fno-alias -ansi-alias -restrict \ + -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpiicpc -LINKFLAGS = -g -qopenmp $(OPTFLAGS) +LINKFLAGS = -qopenmp $(OPTFLAGS) LIB = -ltbbmalloc SIZE = size diff --git a/src/MOLECULE/angle_table.cpp b/src/MOLECULE/angle_table.cpp index 4d9007adb7263014f9ed1c795e7bf40afa274683..6e145efa10850909b288d8e0941ecc27c641a979 100644 --- a/src/MOLECULE/angle_table.cpp +++ b/src/MOLECULE/angle_table.cpp @@ -609,18 +609,22 @@ double AngleTable::splint(double *xa, double *ya, double *y2a, int n, double x) void AngleTable::uf_lookup(int type, double x, double &u, double &f) { - int itable; + if (!ISFINITE(x)) { + error->one(FLERR,"Illegal angle in angle style table"); + } + double fraction,a,b; + const Table *tb = &tables[tabindex[type]]; + int itable = static_cast<int> (x * tb->invdelta); - Table *tb = &tables[tabindex[type]]; + if (itable < 0) itable = 0; + if (itable >= tablength) itable = tablength-1; if (tabstyle == LINEAR) { - itable = static_cast<int> ( x * tb->invdelta); fraction = (x - tb->ang[itable]) * tb->invdelta; u = tb->e[itable] + fraction*tb->de[itable]; f = tb->f[itable] + fraction*tb->df[itable]; } else if (tabstyle == SPLINE) { - itable = static_cast<int> ( x * tb->invdelta); fraction = (x - tb->ang[itable]) * tb->invdelta; b = (x - tb->ang[itable]) * tb->invdelta; @@ -640,17 +644,21 @@ void AngleTable::uf_lookup(int type, double x, double &u, double &f) void AngleTable::u_lookup(int type, double x, double &u) { - int itable; + if (!ISFINITE(x)) { + error->one(FLERR,"Illegal angle in angle style table"); + } + double fraction,a,b; + const Table *tb = &tables[tabindex[type]]; + int itable = static_cast<int> ( x * tb->invdelta); - Table *tb = &tables[tabindex[type]]; + if (itable < 0) itable = 0; + if (itable >= tablength) itable = tablength-1; if (tabstyle == LINEAR) { - itable = static_cast<int> ( x * tb->invdelta); fraction = (x - tb->ang[itable]) * tb->invdelta; u = tb->e[itable] + fraction*tb->de[itable]; } else if (tabstyle == SPLINE) { - itable = static_cast<int> ( x * tb->invdelta); fraction = (x - tb->ang[itable]) * tb->invdelta; b = (x - tb->ang[itable]) * tb->invdelta; diff --git a/src/MOLECULE/bond_gromos.cpp b/src/MOLECULE/bond_gromos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e409b7a72d686a7c573801a7bdaba608548ffd7 --- /dev/null +++ b/src/MOLECULE/bond_gromos.cpp @@ -0,0 +1,210 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include "bond_gromos.h" +#include "atom.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondGromos::BondGromos(LAMMPS *lmp) : Bond(lmp) +{ + reinitflag = 1; +} + +/* ---------------------------------------------------------------------- */ + +BondGromos::~BondGromos() +{ + if (allocated && !copymode) { + memory->destroy(setflag); + memory->destroy(k); + memory->destroy(r0); + } +} + +/* ---------------------------------------------------------------------- */ + +void BondGromos::compute(int eflag, int vflag) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + + ebond = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = 0; + + double **x = atom->x; + double **f = atom->f; + int **bondlist = neighbor->bondlist; + int nbondlist = neighbor->nbondlist; + int nlocal = atom->nlocal; + int newton_bond = force->newton_bond; + + for (n = 0; n < nbondlist; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + const double rsq = delx*delx + dely*dely + delz*delz; + const double dr = rsq - r0[type]*r0[type]; + const double kdr = k[type]*dr; + + // force & energy + + fbond = -4.0 * kdr; + if (eflag) ebond = kdr*dr; + + // apply force to each of 2 atoms + + if (newton_bond || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (newton_bond || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (evflag) ev_tally(i1,i2,nlocal,newton_bond,ebond,fbond,delx,dely,delz); + } +} + +/* ---------------------------------------------------------------------- */ + +void BondGromos::allocate() +{ + allocated = 1; + int n = atom->nbondtypes; + + memory->create(k,n+1,"bond:k"); + memory->create(r0,n+1,"bond:r0"); + + memory->create(setflag,n+1,"bond:setflag"); + for (int i = 1; i <= n; i++) setflag[i] = 0; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more types +------------------------------------------------------------------------- */ + +void BondGromos::coeff(int narg, char **arg) +{ + if (narg != 3) error->all(FLERR,"Incorrect args for bond coefficients"); + if (!allocated) allocate(); + + int ilo,ihi; + force->bounds(FLERR,arg[0],atom->nbondtypes,ilo,ihi); + + double k_one = force->numeric(FLERR,arg[1]); + double r0_one = force->numeric(FLERR,arg[2]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + k[i] = k_one; + r0[i] = r0_one; + setflag[i] = 1; + count++; + } + + if (count == 0) error->all(FLERR,"Incorrect args for bond coefficients"); +} + +/* ---------------------------------------------------------------------- + return an equilbrium bond length +------------------------------------------------------------------------- */ + +double BondGromos::equilibrium_distance(int i) +{ + return r0[i]; +} + +/* ---------------------------------------------------------------------- + proc 0 writes out coeffs to restart file +------------------------------------------------------------------------- */ + +void BondGromos::write_restart(FILE *fp) +{ + fwrite(&k[1],sizeof(double),atom->nbondtypes,fp); + fwrite(&r0[1],sizeof(double),atom->nbondtypes,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads coeffs from restart file, bcasts them +------------------------------------------------------------------------- */ + +void BondGromos::read_restart(FILE *fp) +{ + allocate(); + + if (comm->me == 0) { + fread(&k[1],sizeof(double),atom->nbondtypes,fp); + fread(&r0[1],sizeof(double),atom->nbondtypes,fp); + } + MPI_Bcast(&k[1],atom->nbondtypes,MPI_DOUBLE,0,world); + MPI_Bcast(&r0[1],atom->nbondtypes,MPI_DOUBLE,0,world); + + for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to data file +------------------------------------------------------------------------- */ + +void BondGromos::write_data(FILE *fp) +{ + for (int i = 1; i <= atom->nbondtypes; i++) + fprintf(fp,"%d %g %g\n",i,k[i],r0[i]); +} + +/* ---------------------------------------------------------------------- */ + +double BondGromos::single(int type, double rsq, int i, int j, + double &fforce) +{ + double dr = rsq - r0[type]*r0[type]; + fforce = -4.0*k[type] * dr; + return k[type]*dr*dr; +} + +/* ---------------------------------------------------------------------- + Return ptr to internal members upon request. +------------------------------------------------------------------------ */ +void *BondGromos::extract( char *str, int &dim ) +{ + dim = 1; + if( strcmp(str,"kappa")==0) return (void*) k; + if( strcmp(str,"r0")==0) return (void*) r0; + return NULL; +} diff --git a/src/MOLECULE/bond_gromos.h b/src/MOLECULE/bond_gromos.h new file mode 100644 index 0000000000000000000000000000000000000000..dafe85e92b2e671427d3c1e8e8e68e6ab38b0202 --- /dev/null +++ b/src/MOLECULE/bond_gromos.h @@ -0,0 +1,58 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(gromos,BondGromos) + +#else + +#ifndef LMP_BOND_GROMOS_H +#define LMP_BOND_GROMOS_H + +#include <stdio.h> +#include "bond.h" + +namespace LAMMPS_NS { + +class BondGromos : public Bond { + public: + BondGromos(class LAMMPS *); + virtual ~BondGromos(); + virtual void compute(int, int); + void coeff(int, char **); + double equilibrium_distance(int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_data(FILE *); + double single(int, double, int, int, double &); + virtual void *extract(char *, int &); + + protected: + double *k,*r0; + + virtual void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args for bond coefficients + +Self-explanatory. Check the input script or data file. + +*/ diff --git a/src/MOLECULE/bond_table.cpp b/src/MOLECULE/bond_table.cpp index 38cbe7e406823af76b118c6eb0529515e8d7971d..4f8db66757c5b71fa392e3c84ee5d5d6f76ec946 100644 --- a/src/MOLECULE/bond_table.cpp +++ b/src/MOLECULE/bond_table.cpp @@ -590,29 +590,29 @@ double BondTable::splint(double *xa, double *ya, double *y2a, int n, double x) void BondTable::uf_lookup(int type, double x, double &u, double &f) { - int itable; + if (!ISFINITE(x)) { + error->one(FLERR,"Illegal bond in bond style table"); + } + double fraction,a,b; char estr[128]; - - Table *tb = &tables[tabindex[type]]; - if (x < tb->lo) { + const Table *tb = &tables[tabindex[type]]; + const int itable = static_cast<int> ((x - tb->lo) * tb->invdelta); + if (itable < 0) { sprintf(estr,"Bond length < table inner cutoff: " "type %d length %g",type,x); error->one(FLERR,estr); - } - if (x > tb->hi) { + } else if (itable >= tablength) { sprintf(estr,"Bond length > table outer cutoff: " "type %d length %g",type,x); error->one(FLERR,estr); } if (tabstyle == LINEAR) { - itable = static_cast<int> ((x - tb->lo) * tb->invdelta); fraction = (x - tb->r[itable]) * tb->invdelta; u = tb->e[itable] + fraction*tb->de[itable]; f = tb->f[itable] + fraction*tb->df[itable]; } else if (tabstyle == SPLINE) { - itable = static_cast<int> ((x - tb->lo) * tb->invdelta); fraction = (x - tb->r[itable]) * tb->invdelta; b = (x - tb->r[itable]) * tb->invdelta; @@ -633,19 +633,28 @@ void BondTable::uf_lookup(int type, double x, double &u, double &f) void BondTable::u_lookup(int type, double x, double &u) { - int itable; - double fraction,a,b; + if (!ISFINITE(x)) { + error->one(FLERR,"Illegal bond in bond style table"); + } - Table *tb = &tables[tabindex[type]]; - x = MAX(x,tb->lo); - x = MIN(x,tb->hi); + double fraction,a,b; + char estr[128]; + const Table *tb = &tables[tabindex[type]]; + const int itable = static_cast<int> ((x - tb->lo) * tb->invdelta); + if (itable < 0) { + sprintf(estr,"Bond length < table inner cutoff: " + "type %d length %g",type,x); + error->one(FLERR,estr); + } else if (itable >= tablength) { + sprintf(estr,"Bond length > table outer cutoff: " + "type %d length %g",type,x); + error->one(FLERR,estr); + } if (tabstyle == LINEAR) { - itable = static_cast<int> ((x - tb->lo) * tb->invdelta); fraction = (x - tb->r[itable]) * tb->invdelta; u = tb->e[itable] + fraction*tb->de[itable]; } else if (tabstyle == SPLINE) { - itable = static_cast<int> ((x - tb->lo) * tb->invdelta); fraction = (x - tb->r[itable]) * tb->invdelta; b = (x - tb->r[itable]) * tb->invdelta; diff --git a/src/Makefile b/src/Makefile index 243ac869e90a4de35c4cb701ad05fc792069649a..e0f0db77fe3d5eea0ed95b04541047344919ee45 100644 --- a/src/Makefile +++ b/src/Makefile @@ -63,7 +63,7 @@ PACKUSER = user-atc user-awpmd user-cgdna user-cgsdk user-colvars \ user-mgpt user-misc user-molfile \ user-netcdf user-omp user-phonon user-qmmm user-qtb \ user-quip user-reaxc user-smd user-smtbq user-sph user-tally \ - user-vtk + user-uef user-vtk PACKLIB = compress gpu kim kokkos latte meam mpiio mscg poems \ python reax voronoi \ diff --git a/src/OPT/pair_lj_long_coul_long_opt.cpp b/src/OPT/pair_lj_long_coul_long_opt.cpp index 9004e5c93cbe57007584de2105e6e29af2e65d0a..678d2d8bc404581f7ede6e3e78ddd341cf434485 100644 --- a/src/OPT/pair_lj_long_coul_long_opt.cpp +++ b/src/OPT/pair_lj_long_coul_long_opt.cpp @@ -726,7 +726,7 @@ void PairLJLongCoulLongOpt::eval_outer() double cut_in_off_sq = cut_in_off*cut_in_off; double cut_in_on_sq = cut_in_on*cut_in_on; - ineighn = (ineigh = listouter->ilist)+listouter->inum; + ineighn = (ineigh = list->ilist)+list->inum; for (; ineigh<ineighn; ++ineigh) { // loop over my atoms i = *ineigh; fi = f0+3*i; @@ -735,7 +735,7 @@ void PairLJLongCoulLongOpt::eval_outer() lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; diff --git a/src/Purge.list b/src/Purge.list index 7ccdf3afd52ac354c65fc6638557aca7491eed00..312994fdb7cf413464728e45dfec655a7b6feb95 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -16,6 +16,9 @@ style_region.h style_neigh_bin.h style_neigh_pair.h style_neigh_stencil.h +# deleted on 11 October 2017 +fix_shear_history_omp.cpp +fix_shear_history_omp.h # deleted on 5 September 2017 npair_halffull_newton_ssa.cpp npair_halffull_newton_ssa.h diff --git a/src/REPLICA/prd.cpp b/src/REPLICA/prd.cpp index 30ebc779c5e9a8a5c90b335e975c2c4cf05edd65..14eeac8d664551927278454f577de998a7bdc4a0 100644 --- a/src/REPLICA/prd.cpp +++ b/src/REPLICA/prd.cpp @@ -310,6 +310,7 @@ void PRD::command(int narg, char **arg) time_dephase = time_dynamics = time_quench = time_comm = time_output = 0.0; bigint clock = 0; + timer->init(); timer->barrier_start(); time_start = timer->get_wall(Timer::TOTAL); diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp index 5a4d88522488efcb62e4cff69ce482b1f610cf88..347cd3ba67c0927740086a879d2de5f750cf3faf 100644 --- a/src/REPLICA/tad.cpp +++ b/src/REPLICA/tad.cpp @@ -274,6 +274,7 @@ void TAD::command(int narg, char **arg) nbuild = ndanger = 0; time_neb = time_dynamics = time_quench = time_comm = time_output = 0.0; + timer->init(); timer->barrier_start(); time_start = timer->get_wall(Timer::TOTAL); diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index d2a770cc4712db6b48b2f18823a86a23e1b61f82..33a4b441fda4f9f3eefb3039db46c4ea2724a454 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -29,6 +29,8 @@ #include "comm.h" #include "random_mars.h" #include "force.h" +#include "input.h" +#include "variable.h" #include "output.h" #include "math_const.h" #include "memory.h" @@ -127,15 +129,60 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : // nbody = # of non-zero ncount values // use nall as incremented ptr to set body[] values for each atom - } else if (strcmp(arg[3],"molecule") == 0) { + } else if (strcmp(arg[3],"molecule") == 0 || strcmp(arg[3],"custom") == 0) { rstyle = MOLECULE; - iarg = 4; - if (atom->molecule_flag == 0) - error->all(FLERR,"Fix rigid molecule requires atom attribute molecule"); - + tagint *molecule; int *mask = atom->mask; - tagint *molecule = atom->molecule; int nlocal = atom->nlocal; + int custom_flag = strcmp(arg[3],"custom") == 0; + if (custom_flag) { + if (narg < 5) error->all(FLERR,"Illegal fix rigid command"); + + // determine whether atom-style variable or atom property is used. + if (strstr(arg[4],"i_") == arg[4]) { + int is_double=0; + int custom_index = atom->find_custom(arg[4]+2,is_double); + if (custom_index == -1) + error->all(FLERR,"Fix rigid custom requires previously defined property/atom"); + else if (is_double) + error->all(FLERR,"Fix rigid custom requires integer-valued property/atom"); + int minval = INT_MAX; + int *value = atom->ivector[custom_index]; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) minval = MIN(minval,value[i]); + int vmin = minval; + MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world); + molecule = new tagint[nlocal]; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + molecule[i] = (tagint)(value[i] - minval + 1); + else + molecule[i] = 0; + + } else if (strstr(arg[4],"v_") == arg[4]) { + int ivariable = input->variable->find(arg[4]+2); + if (ivariable < 0) + error->all(FLERR,"Variable name for fix rigid custom does not exist"); + if (input->variable->atomstyle(ivariable) == 0) + error->all(FLERR,"Fix rigid custom variable is no atom-style variable"); + double *value = new double[nlocal]; + input->variable->compute_atom(ivariable,0,value,1,0); + int minval = INT_MAX; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) minval = MIN(minval,(int)value[i]); + int vmin = minval; + MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world); + molecule = new tagint[nlocal]; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) molecule[i] = (tagint)((tagint)value[i] - minval + 1); + delete[] value; + } else error->all(FLERR,"Unsupported fix rigid custom property"); + } else { + if (atom->molecule_flag == 0) + error->all(FLERR,"Fix rigid molecule requires atom attribute molecule"); + molecule = atom->molecule; + } + iarg = 4 + custom_flag; tagint maxmol_tag = -1; for (i = 0; i < nlocal; i++) @@ -174,6 +221,7 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : } memory->destroy(ncount); + if (custom_flag) delete [] molecule; // each listed group is a rigid body // check if all listed groups exist diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 1404c3bf58da22f20739dbf521c8aa2adcfea723..e6083f64937e5077ffac90c88ebe5014b393629a 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -29,7 +29,9 @@ #include "group.h" #include "comm.h" #include "force.h" +#include "input.h" #include "output.h" +#include "variable.h" #include "random_mars.h" #include "math_const.h" #include "memory.h" @@ -64,11 +66,12 @@ enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), step_respa(NULL), - infile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL), - xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), - avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), - itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), id_dilate(NULL), - onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), idclose(NULL), rsqclose(NULL) + infile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL), + xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), + avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), + itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), + id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), + idclose(NULL), rsqclose(NULL) { int i; @@ -89,7 +92,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // perform initial allocation of atom-based arrays // register with Atom class - extended = orientflag = dorientflag = 0; + extended = orientflag = dorientflag = customflag = 0; bodyown = NULL; bodytag = NULL; atom2body = NULL; @@ -103,24 +106,71 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // parse args for rigid body specification + int *mask = atom->mask; + tagint *bodyid = NULL; + int nlocal = atom->nlocal; + if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command"); - if (strcmp(arg[3],"molecule") != 0) - error->all(FLERR,"Illegal fix rigid/small command"); + if (strcmp(arg[3],"molecule") == 0) { + if (atom->molecule_flag == 0) + error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); + bodyid = atom->molecule; + + } else if (strcmp(arg[3],"custom") == 0) { + if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command"); + bodyid = new tagint[nlocal]; + customflag = 1; + + // determine whether atom-style variable or atom property is used. + if (strstr(arg[4],"i_") == arg[4]) { + int is_double=0; + int custom_index = atom->find_custom(arg[4]+2,is_double); + if (custom_index == -1) + error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom"); + else if (is_double) + error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom"); + + int minval = INT_MAX; + int *value = atom->ivector[custom_index]; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) minval = MIN(minval,value[i]); + int vmin = minval; + MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world); + + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + bodyid[i] = (tagint)(value[i] - minval + 1); + else bodyid[i] = 0; + + } else if (strstr(arg[4],"v_") == arg[4]) { + int ivariable = input->variable->find(arg[4]+2); + if (ivariable < 0) + error->all(FLERR,"Variable name for fix rigid/small custom does not exist"); + if (input->variable->atomstyle(ivariable) == 0) + error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable"); + double *value = new double[nlocal]; + input->variable->compute_atom(ivariable,0,value,1,0); + int minval = INT_MAX; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) minval = MIN(minval,(int)value[i]); + int vmin = minval; + MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world); + + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + bodyid[i] = (tagint)((tagint)value[i] - minval + 1); + else bodyid[0] = 0; + delete[] value; + } else error->all(FLERR,"Unsupported fix rigid custom property"); + } else error->all(FLERR,"Illegal fix rigid/small command"); - if (atom->molecule_flag == 0) - error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); if (atom->map_style == 0) error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify"); - // maxmol = largest molecule # - - int *mask = atom->mask; - tagint *molecule = atom->molecule; - int nlocal = atom->nlocal; - + // maxmol = largest bodyid # maxmol = -1; for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) maxmol = MAX(maxmol,molecule[i]); + if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]); tagint itmp; MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world); @@ -155,6 +205,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : } int iarg = 4; + if (customflag) ++iarg; + while (iarg < narg) { if (strcmp(arg[iarg],"langevin") == 0) { if (iarg+5 > narg) error->all(FLERR,"Illegal fix rigid/small command"); @@ -344,11 +396,12 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (pcouple == XYZ || (domain->dimension == 2 && pcouple == XY)) pstyle = ISO; else pstyle = ANISO; - // create rigid bodies based on molecule ID + // create rigid bodies based on molecule or custom ID // sets bodytag for owned atoms // body attributes are computed later by setup_bodies() - create_bodies(); + create_bodies(bodyid); + if (customflag) delete [] bodyid; // set nlocal_body and allocate bodies I own @@ -1424,7 +1477,7 @@ void FixRigidSmall::set_v() set bodytag for all owned atoms ------------------------------------------------------------------------- */ -void FixRigidSmall::create_bodies() +void FixRigidSmall::create_bodies(tagint *bodyid) { int i,m,n; double unwrap[3]; @@ -1464,8 +1517,8 @@ void FixRigidSmall::create_bodies() double *buf; memory->create(buf,ncount*percount,"rigid/small:buf"); - // create map hash for storing unique molecule IDs of my atoms - // key = molecule ID + // create map hash for storing unique body IDs of my atoms + // key = body ID // value = index into per-body data structure // n = # of entries in hash @@ -1477,12 +1530,10 @@ void FixRigidSmall::create_bodies() // value = index into N-length data structure // n = count of unique bodies my atoms are part of - tagint *molecule = atom->molecule; - n = 0; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; - if (hash->find(molecule[i]) == hash->end()) (*hash)[molecule[i]] = n++; + if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++; } // bbox = bounding box of each rigid body my atoms are part of @@ -1494,7 +1545,7 @@ void FixRigidSmall::create_bodies() bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG; } - // pack my atoms into buffer as molecule ID, unwrapped coords + // pack my atoms into buffer as body ID, unwrapped coords double **x = atom->x; @@ -1502,7 +1553,7 @@ void FixRigidSmall::create_bodies() for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; domain->unmap(x[i],image[i],unwrap); - buf[m++] = molecule[i]; + buf[m++] = bodyid[i]; buf[m++] = unwrap[0]; buf[m++] = unwrap[1]; buf[m++] = unwrap[2]; @@ -1542,7 +1593,7 @@ void FixRigidSmall::create_bodies() for (i = 0; i < n; i++) rsqclose[i] = BIG; - // pack my atoms into buffer as molecule ID, atom ID, unwrapped coords + // pack my atoms into buffer as body ID, atom ID, unwrapped coords tagint *tag = atom->tag; @@ -1550,7 +1601,7 @@ void FixRigidSmall::create_bodies() for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; domain->unmap(x[i],image[i],unwrap); - buf[m++] = molecule[i]; + buf[m++] = bodyid[i]; buf[m++] = ubuf(tag[i]).d; buf[m++] = unwrap[0]; buf[m++] = unwrap[1]; @@ -1570,7 +1621,7 @@ void FixRigidSmall::create_bodies() for (i = 0; i < nlocal; i++) { bodytag[i] = 0; if (!(mask[i] & groupbit)) continue; - m = hash->find(molecule[i])->second; + m = hash->find(bodyid[i])->second; bodytag[i] = idclose[m]; rsqmax = MAX(rsqmax,rsqclose[m]); } diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index b07dea4f333ea3f27b7919fb74a4b2c52fc9585e..22f9b0c16c31e7ba8910a558dc1802fff6d4b128 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -79,6 +79,7 @@ class FixRigidSmall : public Fix { char *infile; // file to read rigid body attributes from int setupflag; // 1 if body properties are setup, else 0 int commflag; // various modes of forward/reverse comm + int customflag; // 1 if custom property/variable define bodies int nbody; // total # of rigid bodies int nlinear; // total # of linear rigid bodies tagint maxmol; // max mol-ID @@ -187,7 +188,7 @@ class FixRigidSmall : public Fix { void image_shift(); void set_xv(); void set_v(); - void create_bodies(); + void create_bodies(tagint *); void setup_bodies_static(); void setup_bodies_dynamic(); void readfile(int, double **, int *); diff --git a/src/USER-COLVARS/colvarproxy_lammps.cpp b/src/USER-COLVARS/colvarproxy_lammps.cpp index 17dff305673456b1312ef2be36393f6c8e54281e..c5b9e5a60c860355a4a5960089b8f288b9566841 100644 --- a/src/USER-COLVARS/colvarproxy_lammps.cpp +++ b/src/USER-COLVARS/colvarproxy_lammps.cpp @@ -120,12 +120,6 @@ colvarproxy_lammps::colvarproxy_lammps(LAMMPS_NS::LAMMPS *lmp, if (restart_output_prefix_str.rfind(".*") != std::string::npos) restart_output_prefix_str.erase(restart_output_prefix_str.rfind(".*"),2); -#if defined(_OPENMP) - if (smp_thread_id() == 0) { - omp_init_lock(&smp_lock_state); - } -#endif - // initialize multi-replica support, if available if (replica_enabled()) { MPI_Comm_rank(inter_comm, &inter_me); @@ -143,7 +137,7 @@ void colvarproxy_lammps::init(const char *conf_file) colvars = new colvarmodule(this); cvm::log("Using LAMMPS interface, version "+ - cvm::to_str(COLVARPROXY_VERSION)+".\n"); + cvm::to_str(COLVARPROXY_VERSION)+".\n"); my_angstrom = _lmp->force->angstrom; my_boltzmann = _lmp->force->boltz; @@ -155,7 +149,8 @@ void colvarproxy_lammps::init(const char *conf_file) colvars->setup_output(); if (_lmp->update->ntimestep != 0) { - cvm::log("Initializing step number as firstTimestep.\n"); + cvm::log("Setting initial step number from LAMMPS: "+ + cvm::to_str(_lmp->update->ntimestep)+"\n"); colvars->it = colvars->it_restart = _lmp->update->ntimestep; } @@ -172,7 +167,6 @@ colvarproxy_lammps::~colvarproxy_lammps() { delete _random; if (colvars != NULL) { - colvars->write_output_files(); delete colvars; colvars = NULL; } @@ -188,10 +182,18 @@ int colvarproxy_lammps::setup() // trigger colvars computation double colvarproxy_lammps::compute() { + if (cvm::debug()) { + log(std::string(cvm::line_marker)+ + "colvarproxy_lammps step no. "+ + cvm::to_str(_lmp->update->ntimestep)+" [first - last = "+ + cvm::to_str(_lmp->update->beginstep)+" - "+ + cvm::to_str(_lmp->update->endstep)+"]\n"); + } + if (first_timestep) { first_timestep = false; } else { - // Use the time step number inherited from LAMMPS + // Use the time step number from LAMMPS Update object if ( _lmp->update->ntimestep - previous_step == 1 ) colvars->it++; // Other cases could mean: @@ -241,6 +243,13 @@ void colvarproxy_lammps::serialize_status(std::string &rst) rst = os.str(); } +void colvarproxy_lammps::write_output_files() +{ + // TODO skip output if undefined + colvars->write_restart_file(cvm::output_prefix()+".colvars.state"); + colvars->write_output_files(); +} + // set status from string bool colvarproxy_lammps::deserialize_status(std::string &rst) { @@ -331,89 +340,6 @@ int colvarproxy_lammps::backup_file(char const *filename) } -#if defined(_OPENMP) - - -// SMP support - -int colvarproxy_lammps::smp_enabled() -{ - if (b_smp_active) { - return COLVARS_OK; - } - return COLVARS_ERROR; -} - - -int colvarproxy_lammps::smp_colvars_loop() -{ - colvarmodule *cv = this->colvars; - colvarproxy_lammps *proxy = (colvarproxy_lammps *) cv->proxy; -#pragma omp parallel for - for (size_t i = 0; i < cv->variables_active_smp()->size(); i++) { - colvar *x = (*(cv->variables_active_smp()))[i]; - int x_item = (*(cv->variables_active_smp_items()))[i]; - if (cvm::debug()) { - cvm::log("["+cvm::to_str(proxy->smp_thread_id())+"/"+cvm::to_str(proxy->smp_num_threads())+ - "]: calc_colvars_items_smp(), i = "+cvm::to_str(i)+", cv = "+ - x->name+", cvc = "+cvm::to_str(x_item)+"\n"); - } - x->calc_cvcs(x_item, 1); - } - return cvm::get_error(); -} - - -int colvarproxy_lammps::smp_biases_loop() -{ - colvarmodule *cv = this->colvars; -#pragma omp parallel for - for (size_t i = 0; i < cv->biases_active()->size(); i++) { - colvarbias *b = (*(cv->biases_active()))[i]; - if (cvm::debug()) { - cvm::log("Calculating bias \""+b->name+"\" on thread "+ - cvm::to_str(smp_thread_id())+"\n"); - } - b->update(); - } - return cvm::get_error(); -} - - -int colvarproxy_lammps::smp_thread_id() -{ - return omp_get_thread_num(); -} - - -int colvarproxy_lammps::smp_num_threads() -{ - return omp_get_max_threads(); -} - - -int colvarproxy_lammps::smp_lock() -{ - omp_set_lock(&smp_lock_state); - return COLVARS_OK; -} - - -int colvarproxy_lammps::smp_trylock() -{ - return omp_test_lock(&smp_lock_state) ? COLVARS_OK : COLVARS_ERROR; -} - - -int colvarproxy_lammps::smp_unlock() -{ - omp_unset_lock(&smp_lock_state); - return COLVARS_OK; -} - -#endif - - // multi-replica support void colvarproxy_lammps::replica_comm_barrier() { diff --git a/src/USER-COLVARS/colvarproxy_lammps.h b/src/USER-COLVARS/colvarproxy_lammps.h index 6cdf0edfe8b97272aab39841d8229e8f08d37331..af2aa04dfc890a55443588435206279280581b6c 100644 --- a/src/USER-COLVARS/colvarproxy_lammps.h +++ b/src/USER-COLVARS/colvarproxy_lammps.h @@ -25,10 +25,6 @@ #include <vector> #include <iostream> -#if defined(_OPENMP) -#include <omp.h> -#endif - /* struct for packed data communication of coordinates and forces. */ struct commdata { int tag,type; @@ -91,7 +87,8 @@ class colvarproxy_lammps : public colvarproxy { // methods for lammps to move data or trigger actions in the proxy public: void set_temperature(double t) { t_target = t; }; - bool total_forces_enabled() const { return total_force_requested; }; + bool total_forces_enabled() const { return total_force_requested; }; + bool total_forces_same_step() const { return true; }; bool want_exit() const { return do_exit; }; // perform colvars computation. returns biasing energy @@ -103,6 +100,10 @@ class colvarproxy_lammps : public colvarproxy { // set status from string bool deserialize_status(std::string &); + // Write files expected from Colvars (called by post_run()) + void write_output_files(); + + // implementation of pure methods from base class public: @@ -140,21 +141,6 @@ class colvarproxy_lammps : public colvarproxy { // implementation of optional methods from base class public: -#if defined(_OPENMP) - // SMP support - int smp_enabled(); - int smp_colvars_loop(); - int smp_biases_loop(); - int smp_thread_id(); - int smp_num_threads(); -protected: - omp_lock_t smp_lock_state; -public: - int smp_lock(); - int smp_trylock(); - int smp_unlock(); -#endif - // Multi-replica support // Indicate if multi-replica support is available and active virtual bool replica_enabled() { return (inter_comm != MPI_COMM_NULL); } diff --git a/src/USER-COLVARS/colvarproxy_lammps_version.h b/src/USER-COLVARS/colvarproxy_lammps_version.h index 0eb6f2d95ac6bbb4c98fe5c75fc2308a7758342c..45ecea867f17c7fdf2b5bbad780ac860287e8836 100644 --- a/src/USER-COLVARS/colvarproxy_lammps_version.h +++ b/src/USER-COLVARS/colvarproxy_lammps_version.h @@ -1,5 +1,5 @@ #ifndef COLVARPROXY_VERSION -#define COLVARPROXY_VERSION "2017-07-19" +#define COLVARPROXY_VERSION "2017-10-20" // This file is part of the Collective Variables module (Colvars). // The original version of Colvars and its updates are located at: // https://github.com/colvars/colvars diff --git a/src/USER-COLVARS/fix_colvars.cpp b/src/USER-COLVARS/fix_colvars.cpp index 59e6c46b76113bd1594dcf2acb983ee75212b15b..956ba6498a518dacc28f4848788fd55427db839e 100644 --- a/src/USER-COLVARS/fix_colvars.cpp +++ b/src/USER-COLVARS/fix_colvars.cpp @@ -379,6 +379,7 @@ int FixColvars::setmask() mask |= POST_FORCE; mask |= POST_FORCE_RESPA; mask |= END_OF_STEP; + mask |= POST_RUN; return mask; } @@ -913,6 +914,7 @@ void FixColvars::write_restart(FILE *fp) if (me == 0) { std::string rest_text(""); proxy->serialize_status(rest_text); + // TODO call write_output_files() const char *cvm_state = rest_text.c_str(); int len = strlen(cvm_state) + 1; // need to include terminating NULL byte. fwrite(&len,sizeof(int),1,fp); @@ -934,6 +936,15 @@ void FixColvars::restart(char *buf) /* ---------------------------------------------------------------------- */ +void FixColvars::post_run() +{ + if (me == 0) { + proxy->write_output_files(); + } +} + +/* ---------------------------------------------------------------------- */ + double FixColvars::compute_scalar() { return energy; diff --git a/src/USER-COLVARS/fix_colvars.h b/src/USER-COLVARS/fix_colvars.h index c00b18aa4668b56ea8a1a82a729a0d02a9b0433a..509eca5de35892b3094cf5749b7d6d088d7b0d18 100644 --- a/src/USER-COLVARS/fix_colvars.h +++ b/src/USER-COLVARS/fix_colvars.h @@ -56,6 +56,7 @@ class FixColvars : public Fix { virtual void post_force(int); virtual void post_force_respa(int, int, int); virtual void end_of_step(); + virtual void post_run(); virtual double compute_scalar(); virtual double memory_usage(); diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 2a0175081ea02edab1647cdfaf210acc647300e0..12a572b94caab73125eb01887b89849189c11826 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -23,6 +23,8 @@ NBinStyle(ssa, #define LMP_NBIN_SSA_H #include "nbin_standard.h" +#include "math.h" +#include "error.h" namespace LAMMPS_NS { @@ -47,79 +49,14 @@ class NBinSSA : public NBinStandard { bigint memory_usage(); - inline - int coord2bin(const double & x,const double & y,const double & z) const - { - int ix,iy,iz; - - if (x >= bboxhi_[0]) - ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx; - else if (x >= bboxlo_[0]) { - ix = static_cast<int> ((x-bboxlo_[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1; - - if (y >= bboxhi_[1]) - iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny; - else if (y >= bboxlo_[1]) { - iy = static_cast<int> ((y-bboxlo_[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1; - - if (z >= bboxhi_[2]) - iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz; - else if (z >= bboxlo_[2]) { - iz = static_cast<int> ((z-bboxlo_[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - - inline - int coord2bin(const double & x,const double & y,const double & z, int* i) const - { - int ix,iy,iz; - - if (x >= bboxhi_[0]) - ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx; - else if (x >= bboxlo_[0]) { - ix = static_cast<int> ((x-bboxlo_[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1; - - if (y >= bboxhi_[1]) - iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny; - else if (y >= bboxlo_[1]) { - iy = static_cast<int> ((y-bboxlo_[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1; - - if (z >= bboxhi_[2]) - iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz; - else if (z >= bboxlo_[2]) { - iz = static_cast<int> ((z-bboxlo_[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1; - - i[0] = ix - mbinxlo; - i[1] = iy - mbinylo; - i[2] = iz - mbinzlo; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - inline int coord2bin(const double & x,const double & y,const double & z, int &ixo, int &iyo, int &izo) const { int ix,iy,iz; + if (!ISFINITE(x) || !ISFINITE(y) || !ISFINITE(z)) + error->one(FLERR,"Non-numeric positions - simulation unstable"); + if (x >= bboxhi_[0]) ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx; else if (x >= bboxlo_[0]) { diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp index ee9c0744d3d1d6bc32cee5561f63522228f89176..4163a816ac9bcd8662fd25facb1200019071cd76 100644 --- a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp +++ b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp @@ -378,19 +378,6 @@ void PairLJCutTholeLong::init_style() if (ncoultablebits) init_tables(cut_coul,cut_respa); } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCutTholeLong::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.h b/src/USER-DRUDE/pair_lj_cut_thole_long.h index 894042f6ce6bdad9219d44a174cd3f2fbeac3741..27a917c7375a5c58c49baf037c9bfb65bea5eb84 100644 --- a/src/USER-DRUDE/pair_lj_cut_thole_long.h +++ b/src/USER-DRUDE/pair_lj_cut_thole_long.h @@ -34,7 +34,6 @@ class PairLJCutTholeLong : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp index d8bfd698bebd48762793ce4d24285d17dd42263e..06285a58d3c3009e8ddb94fa7eb8402dc1ff8c3c 100644 --- a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp +++ b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp @@ -226,10 +226,10 @@ void PairLJCharmmCoulLongSoft::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -315,10 +315,10 @@ void PairLJCharmmCoulLongSoft::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -428,10 +428,10 @@ void PairLJCharmmCoulLongSoft::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -758,19 +758,6 @@ void PairLJCharmmCoulLongSoft::init_style() g_ewald = force->kspace->g_ewald; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCharmmCoulLongSoft::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/USER-FEP/pair_lj_charmm_coul_long_soft.h b/src/USER-FEP/pair_lj_charmm_coul_long_soft.h index 7e52ec54b556eeef746f8949e1777b2e8f7d2f4e..252c9f66f591c19bf9efa6fbfbb74c05aa685a79 100644 --- a/src/USER-FEP/pair_lj_charmm_coul_long_soft.h +++ b/src/USER-FEP/pair_lj_charmm_coul_long_soft.h @@ -33,7 +33,6 @@ class PairLJCharmmCoulLongSoft : public Pair { void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp index f7c4084fe2471a6d809f4c2eba08eb390f2f0ae3..7be2ebabea1fc434f5db0e920adc9f26fa70a875 100644 --- a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp @@ -209,10 +209,10 @@ void PairLJCutCoulLongSoft::compute_inner() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -299,10 +299,10 @@ void PairLJCutCoulLongSoft::compute_middle() int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -403,10 +403,10 @@ void PairLJCutCoulLongSoft::compute_outer(int eflag, int vflag) int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -686,19 +686,6 @@ void PairLJCutCoulLongSoft::init_style() g_ewald = force->kspace->g_ewald; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCutCoulLongSoft::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.h b/src/USER-FEP/pair_lj_cut_coul_long_soft.h index a03be3814add79367bff84fcf79236a7e1dadb5e..d49d1c8641816d8e3e0c13f9dce1d0dded07da4b 100644 --- a/src/USER-FEP/pair_lj_cut_coul_long_soft.h +++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.h @@ -32,7 +32,6 @@ class PairLJCutCoulLongSoft : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/USER-FEP/pair_lj_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_soft.cpp index 8b6280a61aff7cb9b2fca5629e2438ee97985a01..9ae108fa338849a37704e58b66e039fff49d09ee 100644 --- a/src/USER-FEP/pair_lj_cut_soft.cpp +++ b/src/USER-FEP/pair_lj_cut_soft.cpp @@ -164,10 +164,10 @@ void PairLJCutSoft::compute_inner() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -242,10 +242,10 @@ void PairLJCutSoft::compute_middle() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -333,10 +333,10 @@ void PairLJCutSoft::compute_outer(int eflag, int vflag) double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -556,19 +556,6 @@ void PairLJCutSoft::init_style() } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCutSoft::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/USER-FEP/pair_lj_cut_soft.h b/src/USER-FEP/pair_lj_cut_soft.h index 50ce685e5cc527f223761ede879e44620ef53853..46202d78a8fe60f38b2f9a0ec13ee6799e9bd4ef 100644 --- a/src/USER-FEP/pair_lj_cut_soft.h +++ b/src/USER-FEP/pair_lj_cut_soft.h @@ -32,7 +32,6 @@ class PairLJCutSoft : public Pair { virtual void settings(int, char **); void coeff(int, char **); virtual void init_style(); - void init_list(int, class NeighList *); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/USER-INTEL/Install.sh b/src/USER-INTEL/Install.sh index f7163e6791e7b56976d16d887d163b161bf5b209..da553d158ac24ff88ec9f86fd4e5b88ad7750191 100644 --- a/src/USER-INTEL/Install.sh +++ b/src/USER-INTEL/Install.sh @@ -46,7 +46,7 @@ action nbin_intel.h action nbin_intel.cpp action npair_intel.h action npair_intel.cpp -action intel_simd.h pair_sw_intel.cpp +action intel_simd.h action intel_intrinsics.h pair_tersoff_intel.cpp action intel_intrinsics_airebo.h pair_airebo_intel.cpp diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README index 3b8444605775e8525d3f6c65f24f355e865c8bb7..871d881f39f3649cd67bc641116b6dcfbe539160 100644 --- a/src/USER-INTEL/README +++ b/src/USER-INTEL/README @@ -30,28 +30,37 @@ be added or changed in the Makefile depending on the version: 2017 update 2 - No changes needed 2017 updates 3 or 4 - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512 -2018 or newer - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high +2018 inital release - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512 +2018u1 or newer - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high ----------------------------------------------------------------------------- When using the suffix command with "intel", intel styles will be used if they exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP -USER-OMP styles will be used whenever USER-INTEL styles are not available. This -allow for running most styles in LAMMPS with threading. +is installed, USER-OMP styles will be used whenever USER-INTEL styles are not +available. This allow for running most styles in LAMMPS with threading. ----------------------------------------------------------------------------- -The Long-Range Thread mode (LRT) in the Intel package currently uses -pthreads by default. If pthreads are not supported in the build environment, -the compile flag "-DLMP_INTEL_NOLRT" will disable the feature to allow for -builds without pthreads. Alternatively, "-DLMP_INTEL_LRT11" can be used to -build with compilers that support threads using the C++11 standard. When using +The Long-Range Thread mode (LRT) in the Intel package is enabled through the +-DLMP_INTEL_USELRT define at compile time. All intel optimized makefiles +include this define. This feature will use pthreads by default. +Alternatively, "-DLMP_INTEL_LRT11" can be used to build with compilers that +support threads intrinsically using the C++11 standard. When using LRT mode, you might need to disable OpenMP affinity settings (e.g. export KMP_AFFINITY=none). LAMMPS will generate a warning if the settings need to be changed. ----------------------------------------------------------------------------- +Unless Intel Math Kernel Library (MKL) is unavailable, -DLMP_USE_MKL_RNG +should be added to the compile flags. This will enable using the MKL Mersenne +Twister random number generator (RNG) for Dissipative Particle Dynamics +(DPD). This RNG can allow significantly faster performance and it also has a +significantly longer period than the standard RNG for DPD. + +----------------------------------------------------------------------------- + In order to use offload to Intel(R) Xeon Phi(TM) coprocessors, the flag -DLMP_INTEL_OFFLOAD should be set in the Makefile. Offload requires the use of Intel compilers. diff --git a/src/USER-INTEL/TEST/README b/src/USER-INTEL/TEST/README index 434189dd263ecef43212a030f60889c4df0d998b..62602d592036e41353aacab21d7fef9d1aacacde 100644 --- a/src/USER-INTEL/TEST/README +++ b/src/USER-INTEL/TEST/README @@ -9,6 +9,7 @@ # in.intel.tersoff - Silicon benchmark with Tersoff # in.intel.water - Coarse-grain water benchmark using Stillinger-Weber # in.intel.airebo - Polyethelene benchmark with AIREBO +# in.intel.dpd - Dissipative Particle Dynamics # ############################################################################# @@ -16,16 +17,17 @@ # Expected Timesteps/second with turbo on and HT enabled, LAMMPS June-2017 # - Compiled w/ Intel Parallel Studio 2017u2 and Makefile.intel_cpu_intelmpi # -# Xeon E5-2697v4 Xeon Phi 7250 +# Xeon E5-2697v4 Xeon Phi 7250 Xeon Gold 6148 # -# in.intel.lj - 199.5 282.3 -# in.intel.rhodo - 12.4 17.5 -# in.intel.lc - 19.0 25.7 -# in.intel.eam - 59.4 92.8 -# in.intel.sw - 132.4 161.9 -# in.intel.tersoff - 83.3 101.1 -# in.intel.water - 53.4 90.3 -# in.intel.airebo - 7.3 11.8 +# in.intel.lj - 199.5 282.3 317.3 +# in.intel.rhodo - 12.4 17.5 24.4 +# in.intel.lc - 19.0 25.7 26.8 +# in.intel.eam - 59.4 92.8 105.6 +# in.intel.sw - 132.4 161.9 213.8 +# in.intel.tersoff - 83.3 101.1 109.6 +# in.intel.water - 53.4 90.3 105.5 +# in.intel.airebo - 7.3 11.8 17.6 +# in.intel.dpd - 74.5 100.4 148.1 # ############################################################################# diff --git a/src/USER-INTEL/TEST/in.intel.dpd b/src/USER-INTEL/TEST/in.intel.dpd new file mode 100644 index 0000000000000000000000000000000000000000..e257d91f84b5cad67d0c78e4478ded62ccec4d80 --- /dev/null +++ b/src/USER-INTEL/TEST/in.intel.dpd @@ -0,0 +1,48 @@ +# DPD benchmark + +variable N index on # Newton Setting +variable w index 10 # Warmup Timesteps +variable t index 4000 # Main Run Timesteps +variable m index 1 # Main Run Timestep Multiplier +variable n index 0 # Use NUMA Mapping for Multi-Node +variable p index 0 # Use Power Measurement + +variable x index 4 +variable y index 2 +variable z index 2 + +variable xx equal 20*$x +variable yy equal 20*$y +variable zz equal 20*$z +variable rr equal floor($t*$m) + +newton $N +if "$n > 0" then "processors * * * grid numa" + +units lj +atom_style atomic +comm_modify mode single vel yes + +lattice fcc 3.0 +region box block 0 ${xx} 0 ${yy} 0 ${zz} +create_box 1 box +create_atoms 1 box +mass 1 1.0 + +velocity all create 1.0 87287 loop geom + +pair_style dpd 1.0 1.0 928948 +pair_coeff 1 1 25.0 4.5 + +neighbor 0.5 bin +neigh_modify delay 0 every 1 + +fix 1 all nve +timestep 0.04 + +thermo 1000 + +if "$p > 0" then "run_style verlet/power" + +if "$w > 0" then "run $w" +run ${rr} diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp index d55afd47427302169ec649f8a12bc1a9e6012681..031c9642000c22a6aa7f4156db319de5409fce6e 100644 --- a/src/USER-INTEL/angle_charmm_intel.cpp +++ b/src/USER-INTEL/angle_charmm_intel.cpp @@ -336,7 +336,7 @@ void AngleCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nangletypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].theta0 = theta0[i]; fc.fc[i].k_ub = k_ub[i]; diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp index 47e0add6906836e8549472b9018dd1d6cf6fd487..84220277d7a736131c30fa37d79fd93364e0c4ca 100644 --- a/src/USER-INTEL/angle_harmonic_intel.cpp +++ b/src/USER-INTEL/angle_harmonic_intel.cpp @@ -318,7 +318,7 @@ void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nangletypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].theta0 = theta0[i]; } diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp index bb96135b2d7abeaf1e9a2b7990fae1fb17a314b0..93d64ed631f5466baec735c52a2770160b2eb8fb 100644 --- a/src/USER-INTEL/bond_fene_intel.cpp +++ b/src/USER-INTEL/bond_fene_intel.cpp @@ -295,7 +295,7 @@ void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nbondtypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].ir0sq = 1.0 / (r0[i] * r0[i]); fc.fc[i].sigma = sigma[i]; diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp index beb0ebcddaf52e1277859dad00fab1ce25a8aea3..0ac466f11386c99bef29b95a33e086349389239c 100644 --- a/src/USER-INTEL/bond_harmonic_intel.cpp +++ b/src/USER-INTEL/bond_harmonic_intel.cpp @@ -266,7 +266,7 @@ void BondHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nbondtypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].r0 = r0[i]; } diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp index 715cef4d37c6a7b0f0e7afcfac57004ff02c9394..0e13e92251594d04348d98f2200c3501a5b29fd5 100644 --- a/src/USER-INTEL/dihedral_charmm_intel.cpp +++ b/src/USER-INTEL/dihedral_charmm_intel.cpp @@ -942,8 +942,8 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, buffers->set_ntypes(tp1); if (weightflag) { - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.ljp[i][j].lj1 = lj14_1[i][j]; fc.ljp[i][j].lj2 = lj14_2[i][j]; fc.ljp[i][j].lj3 = lj14_3[i][j]; @@ -952,7 +952,7 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, } } - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.bp[i].multiplicity = multiplicity[i]; fc.bp[i].cos_shift = cos_shift[i]; fc.bp[i].sin_shift = sin_shift[i]; diff --git a/src/USER-INTEL/dihedral_fourier_intel.cpp b/src/USER-INTEL/dihedral_fourier_intel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..805ffc0e256177f294bc8f2b118e778c4cd7054e --- /dev/null +++ b/src/USER-INTEL/dihedral_fourier_intel.cpp @@ -0,0 +1,441 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include <mpi.h> +#include <math.h> +#include "dihedral_fourier_intel.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define PTOLERANCE (flt_t)1.05 +#define MTOLERANCE (flt_t)-1.05 +typedef struct { int a,b,c,d,t; } int5_t; + +/* ---------------------------------------------------------------------- */ + +DihedralFourierIntel::DihedralFourierIntel(class LAMMPS *lmp) + : DihedralFourier(lmp) +{ + suffix_flag |= Suffix::INTEL; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralFourierIntel::compute(int eflag, int vflag) +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_use_base) { + DihedralFourier::compute(eflag, vflag); + return; + } + #endif + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) + compute<double,double>(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute<float,float>(eflag, vflag, fix->get_single_buffers(), + force_const_single); +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t, class acc_t> +void DihedralFourierIntel::compute(int eflag, int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + if (evflag) { + if (vflag && !eflag) { + if (force->newton_bond) + eval<0,1,1>(vflag, buffers, fc); + else + eval<0,1,0>(vflag, buffers, fc); + } else { + if (force->newton_bond) + eval<1,1,1>(vflag, buffers, fc); + else + eval<1,1,0>(vflag, buffers, fc); + } + } else { + if (force->newton_bond) + eval<0,0,1>(vflag, buffers, fc); + else + eval<0,0,0>(vflag, buffers, fc); + } +} + +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void DihedralFourierIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) + +{ + const int inum = neighbor->ndihedrallist; + if (inum == 0) return; + + ATOM_T * _noalias const x = buffers->get_x(0); + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + int f_stride; + if (NEWTON_BOND) f_stride = buffers->get_stride(nall); + else f_stride = buffers->get_stride(nlocal); + + int tc; + FORCE_T * _noalias f_start; + acc_t * _noalias ev_global; + IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global); + const int nthreads = tc; + + acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5; + if (EFLAG) oedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; + } + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(f_start,f_stride,fc) \ + reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif + + FORCE_T * _noalias const f = f_start + (tid * f_stride); + if (fix->need_zero(tid)) + memset(f, 0, f_stride * sizeof(FORCE_T)); + + const int5_t * _noalias const dihedrallist = + (int5_t *) neighbor->dihedrallist[0]; + + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) sedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif + const int i1 = dihedrallist[n].a; + const int i2 = dihedrallist[n].b; + const int i3 = dihedrallist[n].c; + const int i4 = dihedrallist[n].d; + const int type = dihedrallist[n].t; + + // 1st bond + + const flt_t vb1x = x[i1].x - x[i2].x; + const flt_t vb1y = x[i1].y - x[i2].y; + const flt_t vb1z = x[i1].z - x[i2].z; + + // 2nd bond + + const flt_t vb2xm = x[i2].x - x[i3].x; + const flt_t vb2ym = x[i2].y - x[i3].y; + const flt_t vb2zm = x[i2].z - x[i3].z; + + // 3rd bond + + const flt_t vb3x = x[i4].x - x[i3].x; + const flt_t vb3y = x[i4].y - x[i3].y; + const flt_t vb3z = x[i4].z - x[i3].z; + + // c,s calculation + + const flt_t ax = vb1y*vb2zm - vb1z*vb2ym; + const flt_t ay = vb1z*vb2xm - vb1x*vb2zm; + const flt_t az = vb1x*vb2ym - vb1y*vb2xm; + const flt_t bx = vb3y*vb2zm - vb3z*vb2ym; + const flt_t by = vb3z*vb2xm - vb3x*vb2zm; + const flt_t bz = vb3x*vb2ym - vb3y*vb2xm; + + const flt_t rasq = ax*ax + ay*ay + az*az; + const flt_t rbsq = bx*bx + by*by + bz*bz; + const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + const flt_t rg = sqrt(rgsq); + + flt_t rginv, ra2inv, rb2inv; + rginv = ra2inv = rb2inv = (flt_t)0.0; + if (rg > 0) rginv = (flt_t)1.0/rg; + if (rasq > 0) ra2inv = (flt_t)1.0/rasq; + if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq; + const flt_t rabinv = sqrt(ra2inv*rb2inv); + + flt_t c = (ax*bx + ay*by + az*bz)*rabinv; + const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + #ifndef LMP_INTEL_USE_SIMDOFF + if (c > PTOLERANCE || c < MTOLERANCE) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT, + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1].x,x[i1].y,x[i1].z); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2].x,x[i2].y,x[i2].z); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3].x,x[i3].y,x[i3].z); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4].x,x[i4].y,x[i4].z); + } + } + #endif + + if (c > (flt_t)1.0) c = (flt_t)1.0; + if (c < (flt_t)-1.0) c = (flt_t)-1.0; + + flt_t deng; + flt_t df = (flt_t)0.0; + if (EFLAG) deng = (flt_t)0.0; + + for (int j = 0; j < nterms[type]; j++) { + const flt_t tcos_shift = fc.bp[j][type].cos_shift; + const flt_t tsin_shift = fc.bp[j][type].sin_shift; + const flt_t tk = fc.bp[j][type].k; + const int m = fc.bp[j][type].multiplicity; + + flt_t p = (flt_t)1.0; + flt_t ddf1, df1; + ddf1 = df1 = (flt_t)0.0; + + for (int i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*tcos_shift + df1*tsin_shift; + df1 = df1*tcos_shift - ddf1*tsin_shift; + df1 *= -m; + p += (flt_t)1.0; + + if (m == 0) { + p = (flt_t)1.0 + tcos_shift; + df1 = (flt_t)0.0; + } + + if (EFLAG) deng += tk * p; + df -= tk * df1; + } + + const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + const flt_t hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + const flt_t fga = fg*ra2inv*rginv; + const flt_t hgb = hg*rb2inv*rginv; + const flt_t gaa = -ra2inv*rg; + const flt_t gbb = rb2inv*rg; + + const flt_t dtfx = gaa*ax; + const flt_t dtfy = gaa*ay; + const flt_t dtfz = gaa*az; + const flt_t dtgx = fga*ax - hgb*bx; + const flt_t dtgy = fga*ay - hgb*by; + const flt_t dtgz = fga*az - hgb*bz; + const flt_t dthx = gbb*bx; + const flt_t dthy = gbb*by; + const flt_t dthz = gbb*bz; + + const flt_t sx2 = df*dtgx; + const flt_t sy2 = df*dtgy; + const flt_t sz2 = df*dtgz; + + flt_t f1x = df*dtfx; + flt_t f1y = df*dtfy; + flt_t f1z = df*dtfz; + + const flt_t f2x = sx2 - f1x; + const flt_t f2y = sy2 - f1y; + const flt_t f2z = sz2 - f1z; + + flt_t f4x = df*dthx; + flt_t f4y = df*dthy; + flt_t f4z = df*dthz; + + const flt_t f3x = -sx2 - f4x; + const flt_t f3y = -sy2 - f4y; + const flt_t f3z = -sz2 - f4z; + + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4, + f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal, + sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4, + f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal, + ov0, ov1, ov2, ov3, ov4, ov5); + #endif + } + + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; + } + } + } // for n + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oedihedral += sedihedral; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; + } + #endif + } // omp parallel + + if (EFLAG) energy += oedihedral; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; + } + + fix->set_reduce_flag(); +} + +/* ---------------------------------------------------------------------- */ + +void DihedralFourierIntel::init_style() +{ + DihedralFourier::init_style(); + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast<FixIntel *>(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + _use_base = 0; + if (fix->offload_balance() != 0.0) { + _use_base = 1; + return; + } + #endif + + fix->bond_init_check(); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) + pack_force_const(force_const_single, fix->get_mixed_buffers()); + else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) + pack_force_const(force_const_double, fix->get_double_buffers()); + else + pack_force_const(force_const_single, fix->get_single_buffers()); +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t, class acc_t> +void DihedralFourierIntel::pack_force_const(ForceConst<flt_t> &fc, + IntelBuffers<flt_t,acc_t> *buffers) +{ + const int bp1 = atom->ndihedraltypes + 1; + fc.set_ntypes(bp1, setflag, nterms, memory); + + for (int i = 1; i < bp1; i++) { + if (setflag[i]) { + for (int j = 0; j < nterms[i]; j++) { + fc.bp[j][i].cos_shift = cos_shift[i][j]; + fc.bp[j][i].sin_shift = sin_shift[i][j]; + fc.bp[j][i].k = k[i][j]; + fc.bp[j][i].multiplicity = multiplicity[i][j]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t> +void DihedralFourierIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes, + int *setflag, + int *nterms, + Memory *memory) { + if (nbondtypes != _nbondtypes) { + if (_nbondtypes > 0) + _memory->destroy(bp); + + if (nbondtypes > 0) { + _maxnterms = 1; + for (int i = 1; i <= nbondtypes; i++) + if (setflag[i]) _maxnterms = MAX(_maxnterms, nterms[i]); + + _memory->create(bp, _maxnterms, nbondtypes, "dihedralfourierintel.bp"); + } + } + _nbondtypes = nbondtypes; + _memory = memory; +} diff --git a/src/USER-INTEL/dihedral_fourier_intel.h b/src/USER-INTEL/dihedral_fourier_intel.h new file mode 100644 index 0000000000000000000000000000000000000000..a775e129f432a041b9687aaa4cf2c69cfd78f6c7 --- /dev/null +++ b/src/USER-INTEL/dihedral_fourier_intel.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(fourier/intel,DihedralFourierIntel) + +#else + +#ifndef LMP_DIHEDRAL_FOURIER_INTEL_H +#define LMP_DIHEDRAL_FOURIER_INTEL_H + +#include "dihedral_fourier.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + +class DihedralFourierIntel : public DihedralFourier { + + public: + DihedralFourierIntel(class LAMMPS *lmp); + virtual void compute(int, int); + void init_style(); + + private: + FixIntel *fix; + + template <class flt_t> class ForceConst; + template <class flt_t, class acc_t> + void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc); + template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); + template <class flt_t, class acc_t> + void pack_force_const(ForceConst<flt_t> &fc, + IntelBuffers<flt_t, acc_t> *buffers); + + #ifdef _LMP_INTEL_OFFLOAD + int _use_base; + #endif + + template <class flt_t> + class ForceConst { + public: + typedef struct { flt_t cos_shift, sin_shift, k; + int multiplicity; } fc_packed1; + + fc_packed1 **bp; + + ForceConst() : _nbondtypes(0) {} + ~ForceConst() { set_ntypes(0, NULL, NULL, NULL); } + + void set_ntypes(const int nbondtypes, int *setflag, int *nterms, + Memory *memory); + + private: + int _nbondtypes, _maxnterms; + Memory *_memory; + }; + ForceConst<float> force_const_single; + ForceConst<double> force_const_double; +}; + +} + +#endif +#endif diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp index 196b024fa62d9ed9e192ea9f4b7395b7173c206c..5d16b0da745425abd66617ab50043ef796d78db3 100644 --- a/src/USER-INTEL/dihedral_harmonic_intel.cpp +++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp @@ -400,7 +400,7 @@ void DihedralHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->ndihedraltypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.bp[i].multiplicity = multiplicity[i]; fc.bp[i].cos_shift = cos_shift[i]; fc.bp[i].sin_shift = sin_shift[i]; diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp index 1abeba1d5ea5d71a6ed27033502c233a183c8a71..e290ab90616b94df0135264dcbbf66fe8546a36a 100644 --- a/src/USER-INTEL/dihedral_opls_intel.cpp +++ b/src/USER-INTEL/dihedral_opls_intel.cpp @@ -427,7 +427,7 @@ void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->ndihedraltypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.bp[i].k1 = k1[i]; fc.bp[i].k2 = k2[i]; fc.bp[i].k3 = k3[i]; diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index 637fc0d06e888c409733ee722437ef33ab286ac0..eac48b8510b4aa67a5e93f689ae1381510e02b57 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -285,6 +285,7 @@ int FixIntel::setmask() { int mask = 0; mask |= PRE_REVERSE; + mask |= MIN_PRE_REVERSE; #ifdef _LMP_INTEL_OFFLOAD mask |= POST_FORCE; mask |= MIN_POST_FORCE; diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h index 068e5ed890354275a3b6b1b5835c530733711d4a..d7093e79bb68c91d785a66f257338bc6ccff81bf 100644 --- a/src/USER-INTEL/fix_intel.h +++ b/src/USER-INTEL/fix_intel.h @@ -43,6 +43,7 @@ class FixIntel : public Fix { virtual int setmask(); virtual void init(); virtual void setup(int); + inline void min_setup(int in) { setup(in); } void setup_pre_reverse(int eflag = 0, int vflag = 0); void pair_init_check(const bool cdmessage=false); @@ -50,6 +51,8 @@ class FixIntel : public Fix { void kspace_init_check(); void pre_reverse(int eflag = 0, int vflag = 0); + inline void min_pre_reverse(int eflag = 0, int vflag = 0) + { pre_reverse(eflag, vflag); } // Get all forces, calculation results from coprocesser void sync_coprocessor(); diff --git a/src/USER-INTEL/improper_cvff_intel.cpp b/src/USER-INTEL/improper_cvff_intel.cpp index dc9765d9130670ec7dd2e991dc8d936b66240142..39090e5a7420b3fd32598cbab2aca8a802e497b1 100644 --- a/src/USER-INTEL/improper_cvff_intel.cpp +++ b/src/USER-INTEL/improper_cvff_intel.cpp @@ -433,7 +433,7 @@ void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nimpropertypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].sign = sign[i]; fc.fc[i].multiplicity = multiplicity[i]; diff --git a/src/USER-INTEL/improper_harmonic_intel.cpp b/src/USER-INTEL/improper_harmonic_intel.cpp index fe0efca5ec163d50f4a75643223f6c8df0980a95..3547061672cff079b53761f928761e746d637d8a 100644 --- a/src/USER-INTEL/improper_harmonic_intel.cpp +++ b/src/USER-INTEL/improper_harmonic_intel.cpp @@ -389,7 +389,7 @@ void ImproperHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, const int bp1 = atom->nimpropertypes + 1; fc.set_ntypes(bp1,memory); - for (int i = 0; i < bp1; i++) { + for (int i = 1; i < bp1; i++) { fc.fc[i].k = k[i]; fc.fc[i].chi = chi[i]; } diff --git a/src/USER-INTEL/intel_buffers.cpp b/src/USER-INTEL/intel_buffers.cpp index b4b664cb943354c3e38b97919fa4d2c5b53bb5aa..ac208f5a0ca7d934e65ac1a1a3decffab9614ffe 100644 --- a/src/USER-INTEL/intel_buffers.cpp +++ b/src/USER-INTEL/intel_buffers.cpp @@ -409,6 +409,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag, IP_PRE_get_stride(_ccache_stride3, nsize * 3, sizeof(acc_t), 0); lmp->memory->create(_ccachef, _ccache_stride3 * nt, "_ccachef"); #endif + memset(_ccachei, 0, vsize * sizeof(int)); memset(_ccachej, 0, vsize * sizeof(int)); #ifdef _LMP_INTEL_OFFLOAD @@ -425,7 +426,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag, #pragma offload_transfer target(mic:_cop) \ nocopy(ccachex,ccachey:length(vsize) alloc_if(1) free_if(0)) \ nocopy(ccachez,ccachew:length(vsize) alloc_if(1) free_if(0)) \ - nocopy(ccachei:length(vsize) alloc_if(1) free_if(0)) \ + in(ccachei:length(vsize) alloc_if(1) free_if(0)) \ in(ccachej:length(vsize) alloc_if(1) free_if(0)) } #ifdef LMP_USE_AVXCD diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h index a7663d54a68bebaebaeae1148690d550f2cf2c38..068f61023edfd0b08a7a6d983b2ce059bab1e4cc 100644 --- a/src/USER-INTEL/intel_preprocess.h +++ b/src/USER-INTEL/intel_preprocess.h @@ -211,10 +211,9 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, datasize); \ } -#define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, \ - nthreads, vecsize) \ +#define IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthreads, \ + vecsize) \ { \ - tid = omp_get_thread_num(); \ int idelta = static_cast<int>(ceil(static_cast<float>(inum) \ /vecsize/nthreads)); \ idelta *= vecsize; \ @@ -223,6 +222,14 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, if (ito > inum) ito = inum; \ } +#define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, \ + nthreads, vecsize) \ + { \ + tid = omp_get_thread_num(); \ + IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthreads, \ + vecsize); \ + } + #define IP_PRE_omp_stride_id_vec(ifrom, ip, ito, tid, inum, \ nthr, vecsize) \ { \ @@ -235,13 +242,12 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, int nd = nthr / INTEL_HTHREADS; \ int td = tid / INTEL_HTHREADS; \ int tm = tid % INTEL_HTHREADS; \ - IP_PRE_omp_range_id_vec(ifrom, ito, td, inum, nd, \ - vecsize); \ + IP_PRE_omp_range_vec(ifrom, ito, td, inum, nd, vecsize); \ ifrom += tm * vecsize; \ ip = INTEL_HTHREADS * vecsize; \ } else { \ - IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, nthr, \ - vecsize); \ + IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthr, \ + vecsize); \ ip = vecsize; \ } \ } @@ -292,6 +298,15 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, ito = inum; \ } +#define IP_PRE_omp_stride_id_vec(ifrom, ip, ito, tid, inum, \ + nthr, vecsize) \ + { \ + tid = 0; \ + ifrom = 0; \ + ip = 1; \ + ito = inum; \ + } + #endif #define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \ diff --git a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp index 12101712f11ec833b6501b88959844ce9085854b..e6d45d7b2c6941fa87c24cd089ee46863b4828ee 100644 --- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp +++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp @@ -319,7 +319,6 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, const int bstart = binhead[ibin + binstart[k]]; const int bend = binhead[ibin + binend[k]]; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned #pragma simd #endif for (int jj = bstart; jj < bend; jj++) @@ -341,7 +340,6 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, const int bstart = binhead[ibin + stencil[k]]; const int bend = binhead[ibin + stencil[k] + 1]; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned #pragma simd #endif for (int jj = bstart; jj < bend; jj++) diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp index 79dc75366e80d73db0c70ef7cb46d45dce54c1f0..0068e02635c7d6d7bd145fe45a35bade50f64f0c 100644 --- a/src/USER-INTEL/npair_intel.cpp +++ b/src/USER-INTEL/npair_intel.cpp @@ -273,7 +273,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, const int bstart = binhead[ibin + binstart[k]]; const int bend = binhead[ibin + binend[k]]; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned #pragma simd #endif for (int jj = bstart; jj < bend; jj++) @@ -307,7 +306,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, const int bstart = binhead[ibin]; const int bend = binhead[ibin + 1]; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned #pragma simd #endif for (int jj = bstart; jj < bend; jj++) { diff --git a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp index 07beae1e41683d781b2f80df9dbe2e1d790c66cb..e32fd066607fff224be9bcf2d606c4472864220e 100644 --- a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp +++ b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp @@ -458,8 +458,8 @@ void PairBuckCoulCutIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.c_cut[i][j].cutsq = cutsq[i][j]; fc.c_cut[i][j].cut_ljsq = cut_ljsq[i][j]; fc.c_cut[i][j].cut_coulsq = cut_coulsq[i][j]; diff --git a/src/USER-INTEL/pair_buck_coul_long_intel.cpp b/src/USER-INTEL/pair_buck_coul_long_intel.cpp index 995e2e85839d3503fdefd8cc9100a7d49a78f19c..2b9fea7a50f80c0298069d976c3211d47bffc12e 100644 --- a/src/USER-INTEL/pair_buck_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_buck_coul_long_intel.cpp @@ -545,8 +545,8 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { if (cutsq[i][j] < cut_ljsq[i][j]) error->all(FLERR, "Intel variant of lj/buck/coul/long expects lj cutoff<=coulombic"); diff --git a/src/USER-INTEL/pair_buck_intel.cpp b/src/USER-INTEL/pair_buck_intel.cpp index 8c63d2e62d5d9a419ec627d9da01bbf64cfd77a3..05a28eb7f05b71e3df46b4403ae6971b70351295 100644 --- a/src/USER-INTEL/pair_buck_intel.cpp +++ b/src/USER-INTEL/pair_buck_intel.cpp @@ -413,8 +413,8 @@ void PairBuckIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.c_force[i][j].buck1 = buck1[i][j]; fc.c_force[i][j].buck2 = buck2[i][j]; fc.c_force[i][j].rhoinv = rhoinv[i][j]; diff --git a/src/USER-INTEL/pair_dpd_intel.cpp b/src/USER-INTEL/pair_dpd_intel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..09f27504a1bd2a8bc94bb0a6e4f50d5f775fe129 --- /dev/null +++ b/src/USER-INTEL/pair_dpd_intel.cpp @@ -0,0 +1,617 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) + Shun Xu (Computer Network Information Center, CAS) +------------------------------------------------------------------------- */ + +#include <math.h> +#include "pair_dpd_intel.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "suffix.h" +using namespace LAMMPS_NS; + +#define LMP_MKL_RNG VSL_BRNG_MT19937 +#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1 +#define IEPSILON 1.0e10 + +/* ---------------------------------------------------------------------- */ + +PairDPDIntel::PairDPDIntel(LAMMPS *lmp) : + PairDPD(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; + random_thread = NULL; + _nrandom_thread = 0; +} + +/* ---------------------------------------------------------------------- */ + +PairDPDIntel::~PairDPDIntel() +{ + #if defined(_OPENMP) + if (_nrandom_thread) { + #ifdef LMP_USE_MKL_RNG + for (int i = 0; i < _nrandom_thread; i++) + vslDeleteStream(&random_thread[i]); + #else + for (int i = 1; i < _nrandom_thread; i++) + delete random_thread[i]; + #endif + } + #endif + delete []random_thread; +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDIntel::compute(int eflag, int vflag) +{ + if (fix->precision() == FixIntel::PREC_MODE_MIXED) + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) + compute<double,double>(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute<float,float>(eflag, vflag, fix->get_single_buffers(), + force_const_single); + + fix->balance_stamp(); + vflag_fdotr = 0; +} + +template <class flt_t, class acc_t> +void PairDPDIntel::compute(int eflag, int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) +{ + if (eflag || vflag) { + ev_setup(eflag, vflag); + } else evflag = vflag_fdotr = 0; + + const int inum = list->inum; + const int nthreads = comm->nthreads; + const int host_start = fix->host_start_pair(); + const int offload_end = fix->offload_end_pair(); + const int ago = neighbor->ago; + + if (ago != 0 && fix->separate_buffers() == 0) { + fix->start_watch(TIME_PACK); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; + #if defined(_OPENMP) + #pragma omp parallel if(packthreads > 1) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + packthreads, sizeof(ATOM_T)); + buffers->thr_pack(ifrom,ito,ago); + } + fix->stop_watch(TIME_PACK); + } + + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (_onetype) { + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } else { + if (eflag) { + if (force->newton_pair) { + eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } +} + +template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +void PairDPDIntel::eval(const int offload, const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) +{ + const int inum = aend - astart; + if (inum == 0) return; + int nlocal, nall, minlocal; + fix->get_buffern(offload, nlocal, nall, minlocal); + + const int ago = neighbor->ago; + IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); + + ATOM_T * _noalias const x = buffers->get_x(offload); + typedef struct { double x, y, z; } lmp_vt; + lmp_vt *v = (lmp_vt *)atom->v[0]; + const flt_t dtinvsqrt = 1.0/sqrt(update->dt); + + const int * _noalias const numneigh = list->numneigh; + const int * _noalias const cnumneigh = buffers->cnumneigh(list); + const int * _noalias const firstneigh = buffers->firstneigh(list); + const FC_PACKED1_T * _noalias const param = fc.param[0]; + const flt_t * _noalias const special_lj = fc.special_lj; + int * _noalias const rngi_thread = fc.rngi; + const int rng_size = buffers->get_max_nbors(); + + const int ntypes = atom->ntypes + 1; + const int eatom = this->eflag_atom; + + // Determine how much data to transfer + int x_size, q_size, f_stride, ev_size, separate_flag; + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); + + int tc; + FORCE_T * _noalias f_start; + acc_t * _noalias ev_global; + IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); + const int nthreads = tc; + int *overflow = fix->get_off_overflow_flag(); + { + #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) + *timer_compute = MIC_Wtime(); + #endif + + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, 0); + + acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; + + // loop over neighbors of my atoms + #if defined(_OPENMP) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); + iifrom += astart; + iito += astart; + + #ifdef LMP_USE_MKL_RNG + VSLStreamStatePtr *my_random = &(random_thread[tid]); + #else + RanMars *my_random = random_thread[tid]; + #endif + flt_t *my_rand_buffer = fc.rand_buffer_thread[tid]; + int rngi = rngi_thread[tid]; + + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + + flt_t icut, a0, gamma, sigma; + if (ONETYPE) { + icut = param[3].icut; + a0 = param[3].a0; + gamma = param[3].gamma; + sigma = param[3].sigma; + } + for (int i = iifrom; i < iito; i += iip) { + int itype, ptr_off; + const FC_PACKED1_T * _noalias parami; + if (!ONETYPE) { + itype = x[i].w; + ptr_off = itype * ntypes; + parami = param + ptr_off; + } + + const int * _noalias const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + acc_t fxtmp, fytmp, fztmp, fwtmp; + acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + const flt_t vxtmp = v[i].x; + const flt_t vytmp = v[i].y; + const flt_t vztmp = v[i].z; + fxtmp = fytmp = fztmp = (acc_t)0; + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (NEWTON_PAIR == 0) + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + + if (rngi + jnum > rng_size) { + #ifdef LMP_USE_MKL_RNG + if (sizeof(flt_t) == sizeof(float)) + vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, *my_random, rngi, + (float*)my_rand_buffer, (float)0.0, (float)1.0 ); + else + vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, *my_random, rngi, + (double*)my_rand_buffer, 0.0, 1.0 ); + #else + for (int jj = 0; jj < rngi; jj++) + my_rand_buffer[jj] = my_random->gaussian(); + #endif + rngi = 0; + } + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) + #endif + for (int jj = 0; jj < jnum; jj++) { + flt_t forcelj, evdwl; + forcelj = evdwl = (flt_t)0.0; + + int j, jtype, sbindex; + if (!ONETYPE) { + sbindex = jlist[jj] >> SBBITS & 3; + j = jlist[jj] & NEIGHMASK; + } else + j = jlist[jj]; + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + if (!ONETYPE) { + jtype = x[j].w; + icut = parami[jtype].icut; + } + const flt_t rsq = delx * delx + dely * dely + delz * delz; + const flt_t rinv = (flt_t)1.0/sqrt(rsq); + + if (rinv > icut) { + flt_t factor_dpd; + if (!ONETYPE) factor_dpd = special_lj[sbindex]; + + flt_t delvx = vxtmp - v[j].x; + flt_t delvy = vytmp - v[j].y; + flt_t delvz = vztmp - v[j].z; + flt_t dot = delx*delvx + dely*delvy + delz*delvz; + flt_t randnum = my_rand_buffer[jj]; + + flt_t iwd = rinv - icut; + if (rinv > (flt_t)IEPSILON) iwd = (flt_t)0.0; + + if (!ONETYPE) { + a0 = parami[jtype].a0; + gamma = parami[jtype].gamma; + sigma = parami[jtype].sigma; + } + flt_t fpair = a0 - iwd * gamma * dot + sigma * randnum * dtinvsqrt; + if (!ONETYPE) fpair *= factor_dpd; + fpair *= iwd; + + const flt_t fpx = fpair * delx; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * dely; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * delz; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + flt_t cut = (flt_t)1.0/icut; + flt_t r = (flt_t)1.0/rinv; + evdwl = (flt_t)0.5 * a0 * (cut - (flt_t)2.0*r + rsq * icut); + if (!ONETYPE) evdwl *= factor_dpd; + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl; + } + } + + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, delx, dely, delz, fpx, fpy, fpz); + } // if rsq + } // for jj + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + + IP_PRE_ev_tally_atom(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); + rngi += jnum; + } // for ii + + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); + rngi_thread[tid] = rngi; + } // end omp + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; + } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; + } + #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end offload + + if (offload) + fix->stop_watch(TIME_OFFLOAD_LATENCY); + else + fix->stop_watch(TIME_HOST_PAIR); + + if (EFLAG || vflag) + fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); + else + fix->add_result_array(f_start, 0, offload); +} + +/* ---------------------------------------------------------------------- + global settings + ------------------------------------------------------------------------- */ + +void PairDPDIntel::settings(int narg, char **arg) { + #if defined(_OPENMP) + if (_nrandom_thread) { + #ifdef LMP_USE_MKL_RNG + for (int i = 0; i < _nrandom_thread; i++) + vslDeleteStream(&random_thread[i]); + #else + for (int i = 1; i < _nrandom_thread; i++) + delete random_thread[i]; + #endif + } + delete []random_thread; + #endif + PairDPD::settings(narg,arg); + _nrandom_thread = comm->nthreads; + + #ifdef LMP_USE_MKL_RNG + + random_thread=new VSLStreamStatePtr[comm->nthreads]; + #if defined(_OPENMP) + #pragma omp parallel + { + int tid = omp_get_thread_num(); + vslNewStream(&random_thread[tid], LMP_MKL_RNG, + seed + comm->me + comm->nprocs * tid ); + } + #endif + + #else + + random_thread =new RanMars*[comm->nthreads]; + random_thread[0] = random; + #if defined(_OPENMP) + #pragma omp parallel + { + int tid = omp_get_thread_num(); + if (tid > 0) + random_thread[tid] = new RanMars(lmp, seed+comm->me+comm->nprocs*tid); + } + #endif + + #endif +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDIntel::init_style() +{ + PairDPD::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } + neighbor->requests[neighbor->nrequest-1]->intel = 1; + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast<FixIntel *>(modify->fix[ifix]); + + fix->pair_init_check(); + #ifdef _LMP_INTEL_OFFLOAD + if (fix->offload_balance() != 0.0) + error->all(FLERR, + "Offload for dpd/intel is not yet available. Set balance to 0."); + #endif + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) + pack_force_const(force_const_single, fix->get_mixed_buffers()); + else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) + pack_force_const(force_const_double, fix->get_double_buffers()); + else + pack_force_const(force_const_single, fix->get_single_buffers()); +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t, class acc_t> +void PairDPDIntel::pack_force_const(ForceConst<flt_t> &fc, + IntelBuffers<flt_t,acc_t> *buffers) +{ + _onetype = 0; + if (atom->ntypes == 1 && !atom->molecular) _onetype = 1; + + int tp1 = atom->ntypes + 1; + fc.set_ntypes(tp1,comm->nthreads,buffers->get_max_nbors(),memory,_cop); + buffers->set_ntypes(tp1); + flt_t **cutneighsq = buffers->get_cutneighsq(); + + // Repeat cutsq calculation because done after call to init_style + double cut, cutneigh; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + double icut = 1.0 / cut; + fc.param[i][j].icut = fc.param[j][i].icut = icut; + } else { + cut = init_one(i,j); + double icut = 1.0 / cut; + fc.param[i][j].icut = fc.param[j][i].icut = icut; + } + } + } + + for (int i = 0; i < 4; i++) { + fc.special_lj[i] = force->special_lj[i]; + fc.special_lj[0] = 1.0; + } + + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { + fc.param[i][j].a0 = a0[i][j]; + fc.param[i][j].gamma = gamma[i][j]; + fc.param[i][j].sigma = sigma[i][j]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t> +void PairDPDIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, + const int nthreads, + const int max_nbors, + Memory *memory, + const int cop) { + if (ntypes != _ntypes) { + if (_ntypes > 0) { + _memory->destroy(param); + _memory->destroy(rand_buffer_thread); + _memory->destroy(rngi); + } + if (ntypes > 0) { + _cop = cop; + memory->create(param,ntypes,ntypes,"fc.param"); + memory->create(rand_buffer_thread, nthreads, max_nbors, + "fc.rand_buffer_thread"); + memory->create(rngi,nthreads,"fc.param"); + for (int i = 0; i < nthreads; i++) rngi[i] = max_nbors; + } + } + _ntypes = ntypes; + _memory = memory; +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts + ------------------------------------------------------------------------- */ + +void PairDPDIntel::read_restart_settings(FILE *fp) +{ + #if defined(_OPENMP) + if (_nrandom_thread) { + #ifdef LMP_USE_MKL_RNG + for (int i = 0; i < _nrandom_thread; i++) + vslDeleteStream(&random_thread[i]); + #else + for (int i = 1; i < _nrandom_thread; i++) + delete random_thread[i]; + #endif + } + delete []random_thread; + #endif + PairDPD::read_restart_settings(fp); + _nrandom_thread = comm->nthreads; + + #ifdef LMP_USE_MKL_RNG + + random_thread=new VSLStreamStatePtr[comm->nthreads]; + #if defined(_OPENMP) + #pragma omp parallel + { + int tid = omp_get_thread_num(); + vslNewStream(&random_thread[tid], LMP_MKL_RNG, + seed + comm->me + comm->nprocs * tid ); + } + #endif + + #else + + random_thread =new RanMars*[comm->nthreads]; + random_thread[0] = random; + #if defined(_OPENMP) + #pragma omp parallel + { + int tid = omp_get_thread_num(); + if (tid > 0) + random_thread[tid] = new RanMars(lmp, seed+comm->me+comm->nprocs*tid); + } + #endif + + #endif +} diff --git a/src/USER-INTEL/pair_dpd_intel.h b/src/USER-INTEL/pair_dpd_intel.h new file mode 100644 index 0000000000000000000000000000000000000000..416d873c0009ef9864e4beb0cb3fd10b84fcc4a0 --- /dev/null +++ b/src/USER-INTEL/pair_dpd_intel.h @@ -0,0 +1,110 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) + Shun Xu (Computer Network Information Center, CAS) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/intel,PairDPDIntel) + +#else + +#ifndef LMP_PAIR_DPD_INTEL_H +#define LMP_PAIR_DPD_INTEL_H + +#include "pair_dpd.h" +#include "fix_intel.h" + +#ifdef LMP_USE_MKL_RNG +#include "mkl_vsl.h" +#else +#include "random_mars.h" +#endif + +namespace LAMMPS_NS { + +class PairDPDIntel : public PairDPD { + + public: + PairDPDIntel(class LAMMPS *); + ~PairDPDIntel(); + + virtual void compute(int, int); + void settings(int, char **); + void init_style(); + void read_restart_settings(FILE *); + + private: + FixIntel *fix; + int _cop, _onetype, _nrandom_thread; + + #ifdef LMP_USE_MKL_RNG + VSLStreamStatePtr *random_thread; + #else + RanMars **random_thread; + #endif + + template <class flt_t> class ForceConst; + template <class flt_t, class acc_t> + void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc); + template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + void eval(const int offload, const int vflag, + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); + + template <class flt_t, class acc_t> + void pack_force_const(ForceConst<flt_t> &fc, + IntelBuffers<flt_t, acc_t> *buffers); + + // ---------------------------------------------------------------------- + + template <class flt_t> + class ForceConst { + public: + typedef struct { flt_t icut, a0, gamma, sigma; } fc_packed1; + + _alignvar(flt_t special_lj[4],64); + fc_packed1 **param; + flt_t **rand_buffer_thread; + int *rngi; + + ForceConst() : _ntypes(0) {} + ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); } + + void set_ntypes(const int ntypes, const int nthreads, const int max_nbors, + Memory *memory, const int cop); + + private: + int _ntypes, _cop; + Memory *_memory; + }; + ForceConst<float> force_const_single; + ForceConst<double> force_const_double; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: The 'package intel' command is required for /intel styles + +Self-explanatory. + +*/ diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp index 3fbb58308b0d37bfeea2f918cf44de0e7e67a8e5..1f05ad0efc82c917f847a26e4e009c90f1643a2d 100644 --- a/src/USER-INTEL/pair_gayberne_intel.cpp +++ b/src/USER-INTEL/pair_gayberne_intel.cpp @@ -938,8 +938,8 @@ void PairGayBerneIntel::pack_force_const(ForceConst<flt_t> &fc, fc.upsilon = upsilon; fc.mu = mu; - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.ijc[i][j].lj1 = lj1[i][j]; fc.ijc[i][j].lj2 = lj2[i][j]; fc.ijc[i][j].cutsq = cutsq[i][j]; diff --git a/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp index 0dc2c275e87d3d583da1b42a4a493ae04cdd4469..e3afcd64a6e30f04506698b112dc9194606294a3 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp +++ b/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp @@ -521,8 +521,8 @@ void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.lj[i][j].x = lj1[i][j]; fc.lj[i][j].y = lj2[i][j]; fc.lj[i][j].z = lj3[i][j]; diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp index fe99525122c17ae456241ae8f33e6f2deb1bf82a..a2680cdff41bb964cf39e42f85c140837e08fca8 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -586,8 +586,8 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.lj[i][j].x = lj1[i][j]; fc.lj[i][j].y = lj2[i][j]; fc.lj[i][j].z = lj3[i][j]; diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp index e9775d6ec5c96b97bcd89ae39925796474bdca86..c0bf6f35c6e6b220f33d6d6bbb00232d8fcce3bd 100644 --- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp @@ -544,8 +544,8 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { if (cutsq[i][j] < cut_ljsq[i][j]) error->all(FLERR, "Intel variant of lj/cut/coul/long expects lj cutoff<=coulombic"); diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp index 4871821842dd327e8d2da1e1a0bb02a8e4146ae9..f5a7999ee2e9cf13280167af8eecb008dbca59fe 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -437,8 +437,8 @@ void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc, fc.special_lj[0] = 1.0; } - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { + for (int i = 1; i < tp1; i++) { + for (int j = 1; j < tp1; j++) { fc.ljc12o[i][j].lj1 = lj1[i][j]; fc.ljc12o[i][j].lj2 = lj2[i][j]; fc.lj34[i][j].lj3 = lj3[i][j]; diff --git a/src/USER-INTEL/pppm_intel.cpp b/src/USER-INTEL/pppm_intel.cpp index f1cfe591f2a1365ffc8e2f3b68c6cad1dc5cf0bf..db855b75ef8f877303ae6186e12fc59314729397 100644 --- a/src/USER-INTEL/pppm_intel.cpp +++ b/src/USER-INTEL/pppm_intel.cpp @@ -66,11 +66,7 @@ PPPMIntel::PPPMIntel(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) rho_lookup = drho_lookup = NULL; rho_points = 0; - vdxy_brick = vdz0_brick = NULL; - work3 = NULL; - cg_pack = NULL; - - _use_table = _use_packing = _use_lrt = 0; + _use_table = _use_lrt = 0; } PPPMIntel::~PPPMIntel() @@ -82,12 +78,6 @@ PPPMIntel::~PPPMIntel() memory->destroy(rho_lookup); memory->destroy(drho_lookup); - - memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out); - memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out); - memory->destroy(work3); - - delete cg_pack; } /* ---------------------------------------------------------------------- @@ -141,36 +131,6 @@ void PPPMIntel::init() if (order > INTEL_P3M_MAXORDER) error->all(FLERR,"PPPM order greater than supported by USER-INTEL\n"); - _use_packing = (order == 7) && (INTEL_VECTOR_WIDTH == 16) - && (sizeof(FFT_SCALAR) == sizeof(float)) - && (differentiation_flag == 0); - if (_use_packing) { - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out); - create3d_offset(vdxy_brick, nzlo_out, nzhi_out+2, - nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1, - "pppmintel:vdxy_brick"); - memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out); - create3d_offset(vdz0_brick, nzlo_out, nzhi_out+2, - nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1, - "pppmintel:vdz0_brick"); - memory->destroy(work3); - memory->create(work3, 2*nfft_both, "pppmintel:work3"); - - // new communicator for the double-size bricks - delete cg_pack; - int (*procneigh)[2] = comm->procneigh; - cg_pack = new GridComm(lmp,world,2,0, 2*nxlo_in,2*nxhi_in+1,nylo_in, - nyhi_in,nzlo_in,nzhi_in, 2*nxlo_out,2*nxhi_out+1, - nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - cg_pack->ghost_notify(); - cg_pack->setup(); - } } /* ---------------------------------------------------------------------- @@ -272,18 +232,13 @@ void PPPMIntel::compute_first(int eflag, int vflag) // also performs per-atom calculations via poisson_peratom() if (differentiation_flag == 1) poisson_ad(); - else poisson_ik_intel(); + else poisson_ik(); // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); - else { - if (_use_packing) - cg_pack->forward_comm(this,FORWARD_IK); - else - cg->forward_comm(this,FORWARD_IK); - } + else cg->forward_comm(this,FORWARD_IK); // extra per-atom energy/virial communication @@ -604,7 +559,7 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers) interpolate from grid to get electric field & force on my particles for ik ------------------------------------------------------------------------- */ -template<class flt_t, class acc_t, int use_table, int use_packing> +template<class flt_t, class acc_t, int use_table> void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) { // loop over my charges, interpolate electric field from nearby grid points @@ -649,9 +604,9 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) int ny = part2grid[i][1]; int nz = part2grid[i][2]; - int nxsum = (use_packing ? 2 : 1) * (nx + nlower); + int nxsum = nx + nlower; int nysum = ny + nlower; - int nzsum = nz + nlower;; + int nzsum = nz + nlower; FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; @@ -668,12 +623,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) #pragma simd #endif for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { - if (use_packing) { - rho0[2 * k] = rho_lookup[idx][k]; - rho0[2 * k + 1] = rho_lookup[idx][k]; - } else { - rho0[k] = rho_lookup[idx][k]; - } + rho0[k] = rho_lookup[idx][k]; rho1[k] = rho_lookup[idy][k]; rho2[k] = rho_lookup[idz][k]; } @@ -690,12 +640,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) r2 = rho_coeff[l][k] + r2*dy; r3 = rho_coeff[l][k] + r3*dz; } - if (use_packing) { - rho0[2 * (k-nlower)] = r1; - rho0[2 * (k-nlower) + 1] = r1; - } else { - rho0[k-nlower] = r1; - } + rho0[k-nlower] = r1; rho1[k-nlower] = r2; rho2[k-nlower] = r3; } @@ -722,18 +667,12 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) #if defined(LMP_SIMD_COMPILER) #pragma simd #endif - for (int l = 0; l < (use_packing ? 2 : 1) * - INTEL_P3M_ALIGNED_MAXORDER; l++) { + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { int mx = l+nxsum; FFT_SCALAR x0 = y0*rho0[l]; - if (use_packing) { - ekxy_arr[l] -= x0*vdxy_brick[mz][my][mx]; - ekz0_arr[l] -= x0*vdz0_brick[mz][my][mx]; - } else { - ekx_arr[l] -= x0*vdx_brick[mz][my][mx]; - eky_arr[l] -= x0*vdy_brick[mz][my][mx]; - ekz_arr[l] -= x0*vdz_brick[mz][my][mx]; - } + ekx_arr[l] -= x0*vdx_brick[mz][my][mx]; + eky_arr[l] -= x0*vdy_brick[mz][my][mx]; + ekz_arr[l] -= x0*vdz_brick[mz][my][mx]; } } } @@ -741,18 +680,10 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) FFT_SCALAR ekx, eky, ekz; ekx = eky = ekz = ZEROF; - if (use_packing) { - for (int l = 0; l < 2*order; l += 2) { - ekx += ekxy_arr[l]; - eky += ekxy_arr[l+1]; - ekz += ekz0_arr[l]; - } - } else { - for (int l = 0; l < order; l++) { - ekx += ekx_arr[l]; - eky += eky_arr[l]; - ekz += ekz_arr[l]; - } + for (int l = 0; l < order; l++) { + ekx += ekx_arr[l]; + eky += eky_arr[l]; + ekz += ekz_arr[l]; } // convert E-field to force @@ -965,137 +896,6 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers) } } -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik - Does special things for packing mode to avoid repeated copies -------------------------------------------------------------------------- */ - -void PPPMIntel::poisson_ik_intel() -{ - if (_use_packing == 0) { - poisson_ik(); - return; - } - - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + - work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // triclinic system - - if (triclinic) { - poisson_ik_triclinic(); - return; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work3[n] = fky[j]*work1[n+1]; - work3[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work3,work3,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdxy_brick[k][j][2*i] = work2[n]; - vdxy_brick[k][j][2*i+1] = work3[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz0_brick[k][j][2*i] = work2[n]; - vdz0_brick[k][j][2*i+1] = 0.; - n += 2; - } -} - /* ---------------------------------------------------------------------- precompute rho coefficients as a lookup table to save time in make_rho and fieldforce. Instead of doing this polynomial for every atom 6 times @@ -1141,46 +941,6 @@ void PPPMIntel::precompute_rho() } } -/* ---------------------------------------------------------------------- - pack own values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMIntel::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if ((flag == FORWARD_IK) && _use_packing) { - FFT_SCALAR *xsrc = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out]; - FFT_SCALAR *zsrc = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } else { - PPPM::pack_forward(flag, buf, nlist, list); - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPMIntel::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if ((flag == FORWARD_IK) && _use_packing) { - FFT_SCALAR *xdest = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out]; - FFT_SCALAR *zdest = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } else { - PPPM::unpack_forward(flag, buf, nlist, list); - } -} - /* ---------------------------------------------------------------------- memory usage of local arrays ------------------------------------------------------------------------- */ @@ -1201,14 +961,6 @@ double PPPMIntel::memory_usage() bytes += rho_points * INTEL_P3M_ALIGNED_MAXORDER * sizeof(FFT_SCALAR); } } - if (_use_packing) { - bytes += 2 * (nzhi_out + 2 - nzlo_out + 1) * (nyhi_out - nylo_out + 1) - * (2 * nxhi_out + 1 - 2 * nxlo_out + 1) * sizeof(FFT_SCALAR); - bytes -= 3 * (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1) - * (nzhi_out - nzlo_out + 1) * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR); - bytes += cg_pack->memory_usage(); - } return bytes; } diff --git a/src/USER-INTEL/pppm_intel.h b/src/USER-INTEL/pppm_intel.h index 5bffabe0e5f2e3ed07e22aa436ffa706d8dae9da..2a57372558ee7d13dc43717596794e0dc8924e53 100644 --- a/src/USER-INTEL/pppm_intel.h +++ b/src/USER-INTEL/pppm_intel.h @@ -38,8 +38,6 @@ class PPPMIntel : public PPPM { virtual ~PPPMIntel(); virtual void init(); virtual void compute(int, int); - virtual void pack_forward(int, FFT_SCALAR *, int, int *); - virtual void unpack_forward(int, FFT_SCALAR *, int, int *); virtual double memory_usage(); void compute_first(int, int); void compute_second(int, int); @@ -64,12 +62,6 @@ class PPPMIntel : public PPPM { FFT_SCALAR **drho_lookup; FFT_SCALAR half_rho_scale, half_rho_scale_plus; - int _use_packing; - FFT_SCALAR ***vdxy_brick; - FFT_SCALAR ***vdz0_brick; - FFT_SCALAR *work3; - class GridComm *cg_pack; - #ifdef _LMP_INTEL_OFFLOAD int _use_base; #endif @@ -92,23 +84,14 @@ class PPPMIntel : public PPPM { make_rho<flt_t,acc_t,0>(buffers); } } - void poisson_ik_intel(); - template<class flt_t, class acc_t, int use_table, int use_packing> + template<class flt_t, class acc_t, int use_table> void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers); template<class flt_t, class acc_t> void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) { if (_use_table == 1) { - if (_use_packing == 1) { - fieldforce_ik<flt_t, acc_t, 1, 1>(buffers); - } else { - fieldforce_ik<flt_t, acc_t, 1, 0>(buffers); - } + fieldforce_ik<flt_t, acc_t, 1>(buffers); } else { - if (_use_packing == 1) { - fieldforce_ik<flt_t, acc_t, 0, 1>(buffers); - } else { - fieldforce_ik<flt_t, acc_t, 0, 0>(buffers); - } + fieldforce_ik<flt_t, acc_t, 0>(buffers); } } template<class flt_t, class acc_t, int use_table> diff --git a/src/USER-INTEL/verlet_lrt_intel.cpp b/src/USER-INTEL/verlet_lrt_intel.cpp index 81f458614372cca3a6d1e74a5544db263ac60b67..9ff5f8517608fa74c7c9916824aa723247ef3fa6 100644 --- a/src/USER-INTEL/verlet_lrt_intel.cpp +++ b/src/USER-INTEL/verlet_lrt_intel.cpp @@ -68,7 +68,7 @@ void VerletLRTIntel::init() _intel_kspace = (PPPMIntel*)(force->kspace_match("pppm/intel", 0)); - #ifdef LMP_INTEL_NOLRT + #ifndef LMP_INTEL_USELRT error->all(FLERR, "LRT otion for Intel package disabled at compile time"); #endif diff --git a/src/USER-INTEL/verlet_lrt_intel.h b/src/USER-INTEL/verlet_lrt_intel.h index 813cd536050d44f54c2bc3379756970eb69c8320..0d7154ff643ee7a71af39057b13668345939ce1d 100644 --- a/src/USER-INTEL/verlet_lrt_intel.h +++ b/src/USER-INTEL/verlet_lrt_intel.h @@ -23,10 +23,7 @@ IntegrateStyle(verlet/lrt/intel,VerletLRTIntel) #include "verlet.h" #include "pppm_intel.h" -#ifndef LMP_INTEL_USELRT -#define LMP_INTEL_NOLRT -#else - +#ifdef LMP_INTEL_USELRT #ifdef LMP_INTEL_LRT11 #define _LMP_INTEL_LRT_11 #include <thread> diff --git a/src/USER-MANIFOLD/manifold_gaussian_bump.cpp b/src/USER-MANIFOLD/manifold_gaussian_bump.cpp index db8c589afb092feb0e3d1b853e034618317e7fb8..a9ee35bbfc5f23628c2943e70ea8043777028af1 100644 --- a/src/USER-MANIFOLD/manifold_gaussian_bump.cpp +++ b/src/USER-MANIFOLD/manifold_gaussian_bump.cpp @@ -134,7 +134,7 @@ public: // Manifold itself: manifold_gaussian_bump::manifold_gaussian_bump(class LAMMPS* lmp, int narg, char **arg) - : manifold(lmp), lut_z(NULL), lut_zp(NULL) {} + : manifold(lmp), lut_z(NULL), lut_zp(NULL) {} manifold_gaussian_bump::~manifold_gaussian_bump() @@ -361,13 +361,13 @@ void manifold_gaussian_bump::test_lut() n( x, nn ); double taper_z; if( xx <= rc1 ){ - taper_z = gaussian_bump(xx); + taper_z = gaussian_bump(xx); }else if( xx < rc2 ){ - taper_z = lut_get_z( xx ); + taper_z = lut_get_z( xx ); }else{ - taper_z = 0.0; + taper_z = 0.0; } - fprintf( fp, "%g %g %g %g %g\n", xx, gaussian_bump(xx), taper_z, + fprintf( fp, "%g %g %g %g %g %g %g\n", xx, gaussian_bump(xx), taper_z, gg, nn[0], nn[1], nn[2] ); } fclose(fp); diff --git a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp index fd50e774dafb08e7a3be7689ed1081e2fd3a7cef..136c52ab361a2023ee2442fc038039a8d2c1a7d0 100644 --- a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp +++ b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp @@ -24,5 +24,5 @@ void manifold_plane_wiggle::n( const double *x, double *n ) double w = params[1]; n[2] = 1; n[1] = 0.0; - n[0] = -a*w*cos(x[0]); + n[0] = -a*w*cos(w*x[0]); } diff --git a/src/USER-MISC/README b/src/USER-MISC/README index 65146abd5436cd2b0e7ddc2afc66ebfdfa36d463..5af5b22eb7889609834a4449bb5a118e75789c86 100644 --- a/src/USER-MISC/README +++ b/src/USER-MISC/README @@ -47,6 +47,7 @@ fix imd, Axel Kohlmeyer, akohlmey at gmail.com, 9 Nov 2009 fix ipi, Michele Ceriotti (EPFL Lausanne), michele.ceriotti at gmail.com, 24 Nov 2014 fix nvk, Efrem Braun (UC Berkeley), efrem.braun at gmail.com, https://github.com/lammps/lammps/pull/310 fix pimd, Yuxing Peng (U Chicago), yuxing at uchicago.edu, 24 Nov 2014 +fix rhok, Ulf Pedersen (Roskilde U), ulf at urp.dk, 25 Sep 2017 fix smd, Axel Kohlmeyer, akohlmey at gmail.com, 19 May 2008 fix ti/spring, Rodrigo Freitas (Unicamp/Brazil), rodrigohb at gmail.com, 7 Nov 2013 fix ttm/mod, Sergey Starikov and Vasily Pisarev (JIHT), pisarevvv at gmail.com, 2 Feb 2015 diff --git a/src/USER-MISC/fix_rhok.cpp b/src/USER-MISC/fix_rhok.cpp new file mode 100644 index 0000000000000000000000000000000000000000..58b0e95a97e0abe10ac7d48e1e93807a0c5a16ab --- /dev/null +++ b/src/USER-MISC/fix_rhok.cpp @@ -0,0 +1,245 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Ulf R. Pedersen, ulf@urp.dk +------------------------------------------------------------------------- */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "fix_rhok.h" +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "respa.h" +#include "update.h" +#include "citeme.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +static const char cite_fix_rhok[] = + "Bias on the collective density field (fix rhok):\n\n" + "@Article{pedersen_jcp139_104102_2013,\n" + "title = {Direct calculation of the solid-liquid Gibbs free energy difference in a single equilibrium simulation},\n" + "volume = {139},\n" + "number = {10},\n" + "url = {http://aip.scitation.org/doi/10.1063/1.4818747},\n" + "doi = {10.1063/1.4818747},\n" + "urldate = {2017-10-03},\n" + "journal = {J. Chem. Phys.},\n" + "author = {Pedersen, Ulf R.},\n" + "year = {2013},\n" + "pages = {104102}\n" + "}\n\n"; + +FixRhok::FixRhok( LAMMPS* inLMP, int inArgc, char** inArgv ) + : Fix( inLMP, inArgc, inArgv ) +{ + + if (lmp->citeme) lmp->citeme->add(cite_fix_rhok); + + // Check arguments + if( inArgc != 8 ) + error->all(FLERR,"Illegal fix rhoKUmbrella command" ); + + // Set up fix flags + scalar_flag = 1; // have compute_scalar + vector_flag = 1; // have compute_vector... + size_vector = 3; // ...with this many components + global_freq = 1; // whose value can be computed at every timestep + thermo_energy = 1; // this fix changes system's potential energy + extscalar = 0; // but the deltaPE might not scale with # of atoms + extvector = 0; // neither do the components of the vector + + // Parse fix options + int n[3]; + + n[0] = force->inumeric(FLERR,inArgv[3]); + n[1] = force->inumeric(FLERR,inArgv[4]); + n[2] = force->inumeric(FLERR,inArgv[5]); + + mK[0] = n[0]*(2*M_PI / (domain->boxhi[0] - domain->boxlo[0])); + mK[1] = n[1]*(2*M_PI / (domain->boxhi[1] - domain->boxlo[1])); + mK[2] = n[2]*(2*M_PI / (domain->boxhi[2] - domain->boxlo[2])); + + mKappa = force->numeric(FLERR,inArgv[6]); + mRhoK0 = force->numeric(FLERR,inArgv[7]); +} + +// Methods that this fix implements +// -------------------------------- + +// Tells LAMMPS where this fix should act +int +FixRhok::setmask() +{ + int mask = 0; + + // This fix modifies forces... + mask |= POST_FORCE; + mask |= POST_FORCE_RESPA; + mask |= MIN_POST_FORCE; + + // ...and potential energies + mask |= THERMO_ENERGY; + + return mask; +} + +// Initializes the fix at the beginning of a run +void +FixRhok::init() +{ + // RESPA boilerplate + if( strcmp( update->integrate_style, "respa" ) == 0 ) + mNLevelsRESPA = ((Respa *) update->integrate)->nlevels; + + // Count the number of affected particles + int nThisLocal = 0; + int *mask = atom->mask; + int nlocal = atom->nlocal; + for( int i = 0; i < nlocal; i++ ) { // Iterate through all atoms on this CPU + if( mask[i] & groupbit ) { // ...only those affected by this fix + nThisLocal++; + } + } + MPI_Allreduce( &nThisLocal, &mNThis, + 1, MPI_INT, MPI_SUM, world ); + mSqrtNThis = sqrt( mNThis ); +} + +// Initial application of the fix to a system (when doing MD) +void +FixRhok::setup( int inVFlag ) +{ + if( strcmp( update->integrate_style, "verlet" ) == 0 ) + post_force( inVFlag ); + else + { + ((Respa *) update->integrate)->copy_flevel_f( mNLevelsRESPA - 1 ); + post_force_respa( inVFlag, mNLevelsRESPA - 1,0 ); + ((Respa *) update->integrate)->copy_f_flevel( mNLevelsRESPA - 1 ); + } +} + +// Initial application of the fix to a system (when doing minimization) +void +FixRhok::min_setup( int inVFlag ) +{ + post_force( inVFlag ); +} + +// Modify the forces calculated in the main force loop of ordinary MD +void +FixRhok::post_force( int inVFlag ) +{ + double **x = atom->x; + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + // Loop over locally-owned atoms affected by this fix and calculate the + // partial rhoK's + mRhoKLocal[0] = 0.0; + mRhoKLocal[1] = 0.0; + + for( int i = 0; i < nlocal; i++ ) { // Iterate through all atoms on this CPU + if( mask[i] & groupbit ) { // ...only those affected by this fix + + // rho_k = sum_i exp( - i k.r_i ) + mRhoKLocal[0] += cos( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] ); + mRhoKLocal[1] -= sin( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] ); + } + } + + // Now calculate mRhoKGlobal + MPI_Allreduce( mRhoKLocal, mRhoKGlobal, + 2, MPI_DOUBLE, MPI_SUM, world ); + + // Info: < \sum_{i,j} e^{-ik.(r_i - r_j)} > ~ N, so + // we define rho_k as (1 / sqrt(N)) \sum_i e^{-i k.r_i}, so that + // <rho_k^2> is intensive. + mRhoKGlobal[0] /= mSqrtNThis; + mRhoKGlobal[1] /= mSqrtNThis; + + // We'll need magnitude of rho_k + double rhoK = sqrt( mRhoKGlobal[0]*mRhoKGlobal[0] + + mRhoKGlobal[1]*mRhoKGlobal[1] ); + + for( int i = 0; i < nlocal; i++ ) { // Iterate through all atoms on this CPU + if( mask[i] & groupbit ) { // ...only those affected by this fix + + // Calculate forces + // U = kappa/2 ( |rho_k| - rho_k^0 )^2 + // f_i = -grad_i U = -kappa ( |rho_k| - rho_k^0 ) grad_i |rho_k| + // grad_i |rho_k| = Re( rho_k* (-i k e^{-i k . r_i} / sqrt(N)) ) / |rho_k| + // + // In terms of real and imag parts of rho_k, + // + // Re( rho_k* (-i k e^{-i k . r_i}) ) = + // (- Re[rho_k] * sin( k . r_i ) - Im[rho_k] * cos( k . r_i )) * k + + double sinKRi = sin( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] ); + double cosKRi = cos( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] ); + + double prefactor = mKappa * ( rhoK - mRhoK0 ) / rhoK + * (-mRhoKGlobal[0]*sinKRi - mRhoKGlobal[1]*cosKRi) / mSqrtNThis; + f[i][0] -= prefactor * mK[0]; + f[i][1] -= prefactor * mK[1]; + f[i][2] -= prefactor * mK[2]; + } + } +} + +// Forces in RESPA loop +void +FixRhok::post_force_respa( int inVFlag, int inILevel, int inILoop ) +{ + if( inILevel == mNLevelsRESPA - 1 ) + post_force( inVFlag ); +} + +// Forces in minimization loop +void +FixRhok::min_post_force( int inVFlag ) +{ + post_force( inVFlag ); +} + +// Compute the change in the potential energy induced by this fix +double +FixRhok::compute_scalar() +{ + double rhoK = sqrt( mRhoKGlobal[0]*mRhoKGlobal[0] + + mRhoKGlobal[1]*mRhoKGlobal[1] ); + + return 0.5 * mKappa * (rhoK - mRhoK0) * (rhoK - mRhoK0); +} + +// Compute the ith component of the vector +double +FixRhok::compute_vector( int inI ) +{ + if( inI == 0 ) + return mRhoKGlobal[0]; // Real part + else if( inI == 1 ) + return mRhoKGlobal[1]; // Imagniary part + else if( inI == 2 ) + return sqrt( mRhoKGlobal[0]*mRhoKGlobal[0] + + mRhoKGlobal[1]*mRhoKGlobal[1] ); + else + return 12345.0; +} diff --git a/src/USER-MISC/fix_rhok.h b/src/USER-MISC/fix_rhok.h new file mode 100644 index 0000000000000000000000000000000000000000..c950c08b1d14aa3b22f14ce98e9b5c65eb513750 --- /dev/null +++ b/src/USER-MISC/fix_rhok.h @@ -0,0 +1,77 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(rhok,FixRhok) + +#else + +#ifndef LMP_FIX_RHOK_H +#define LMP_FIX_RHOK_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixRhok : public Fix +{ +public: + // Constructor: all the parameters to this fix specified in + // the LAMMPS input get passed in + FixRhok( LAMMPS* inLMP, int inArgc, char** inArgv ); + virtual ~FixRhok() {}; + + // Methods that this fix implements + // -------------------------------- + + // Tells LAMMPS where this fix should act + int setmask(); + + // Initializes the fix at the beginning of a run + void init(); + + // Initial application of the fix to a system (when doing MD / minimization) + void setup( int inVFlag ); + void min_setup( int inVFlag ); + + // Modify the forces calculated in the main force loop, either when + // doing usual MD, RESPA MD or minimization + void post_force( int inVFlag ); + void post_force_respa( int inVFlag, int inILevel, int inILoop ); + void min_post_force( int inVFlag ); + + // Compute the change in the potential energy induced by this fix + double compute_scalar(); + + // Compute the ith component of the vector associated with this fix + double compute_vector( int inI ); + +private: + // RESPA boilerplate + int mNLevelsRESPA; + + // Defining parameters for this umbrella + double mK[3], mKappa, mRhoK0; + + // Number of particles affected by the fix + int mNThis; + double mSqrtNThis; + + // Real and imaginary parts of rho_k := sum_i exp( - i k . r_i ) + double mRhoKLocal[2], mRhoKGlobal[2]; +}; + +} // namespace LAMMPS_NS + +#endif // __FIX_RHOK__ +#endif // FIX_CLASS + diff --git a/src/USER-MISC/fix_srp.cpp b/src/USER-MISC/fix_srp.cpp index f3dec42a8320de89202129fac1741118530f53dc..e1e5f579b875c31f16c70c3f55ba3d44a3f14494 100644 --- a/src/USER-MISC/fix_srp.cpp +++ b/src/USER-MISC/fix_srp.cpp @@ -98,7 +98,7 @@ int FixSRP::setmask() void FixSRP::init() { - if (force->pair_match("hybrid",1) == NULL) + if (force->pair_match("hybrid",1) == NULL && force->pair_match("hybrid/overlay",1) == NULL) error->all(FLERR,"Cannot use pair srp without pair_style hybrid"); int has_rigid = 0; diff --git a/src/USER-NETCDF/dump_netcdf.cpp b/src/USER-NETCDF/dump_netcdf.cpp index 971f69f7ccdf8000d4b39e78107fa4c058536387..af9f94a728f8b61f43e08480f7b80dc33353d54d 100644 --- a/src/USER-NETCDF/dump_netcdf.cpp +++ b/src/USER-NETCDF/dump_netcdf.cpp @@ -88,8 +88,8 @@ DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) : if (multiproc) error->all(FLERR,"Multi-processor writes are not supported."); - if (multifile) - error->all(FLERR,"Multiple files are not supported."); + if (append_flag && multifile) + error->all(FLERR,"Cannot append when writing to multiple files."); perat = new nc_perat_t[nfield]; @@ -224,6 +224,24 @@ DumpNetCDF::~DumpNetCDF() void DumpNetCDF::openfile() { + char *filecurrent = filename; + if (multifile && !singlefile_opened) { + char *filestar = filecurrent; + filecurrent = new char[strlen(filestar) + 16]; + char *ptr = strchr(filestar,'*'); + *ptr = '\0'; + if (padflag == 0) + sprintf(filecurrent,"%s" BIGINT_FORMAT "%s", + filestar,update->ntimestep,ptr+1); + else { + char bif[8],pad[16]; + strcpy(bif,BIGINT_FORMAT); + sprintf(pad,"%%s%%0%d%s%%s",padflag,&bif[1]); + sprintf(filecurrent,pad,filestar,update->ntimestep,ptr+1); + } + *ptr = '*'; + } + if (thermo && !singlefile_opened) { if (thermovar) delete [] thermovar; thermovar = new int[output->thermo->nfield]; @@ -268,14 +286,14 @@ void DumpNetCDF::openfile() ntotalgr = group->count(igroup); if (filewriter) { - if (append_flag && access(filename, F_OK) != -1) { + if (append_flag && !multifile && access(filecurrent, F_OK) != -1) { // Fixme! Perform checks if dimensions and variables conform with // data structure standard. if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( nc_open(filename, NC_WRITE, &ncid), filename ); + NCERRX( nc_open(filecurrent, NC_WRITE, &ncid), filecurrent ); // dimensions NCERRX( nc_inq_dimid(ncid, NC_FRAME_STR, &frame_dim), NC_FRAME_STR ); @@ -312,8 +330,7 @@ void DumpNetCDF::openfile() // Type mangling if (vtype[perat[i].field[0]] == INT) { xtype = NC_INT; - } - else { + } else { if (double_precision) xtype = NC_DOUBLE; else @@ -337,10 +354,13 @@ void DumpNetCDF::openfile() NCERR( nc_inq_dimlen(ncid, frame_dim, &nframes) ); // framei == -1 means append to file, == -2 means override last frame // Note that in the input file this translates to 'yes', '-1', etc. - if (framei < 0 || (append_flag && framei == 0)) framei = nframes+framei+1; + + if (framei <= 0) framei = nframes+framei+1; if (framei < 1) framei = 1; - } - else { + } else { + if (framei != 0) + error->all(FLERR,"at keyword requires use of 'append yes'"); + int dims[NC_MAX_VAR_DIMS]; size_t index[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS]; double d[1]; @@ -348,8 +368,8 @@ void DumpNetCDF::openfile() if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( nc_create(filename, NC_64BIT_DATA, &ncid), - filename ); + NCERRX( nc_create(filecurrent, NC_64BIT_DATA, &ncid), + filecurrent ); // dimensions NCERRX( nc_def_dim(ncid, NC_FRAME_STR, NC_UNLIMITED, &frame_dim), @@ -598,15 +618,39 @@ void DumpNetCDF::closefile() if (filewriter && singlefile_opened) { NCERR( nc_close(ncid) ); singlefile_opened = 0; - // append next time DumpNetCDF::openfile is called - append_flag = 1; // write to next frame upon next open - framei++; + if (multifile) + framei = 1; + else { + // append next time DumpNetCDF::openfile is called + append_flag = 1; + framei++; + } } } /* ---------------------------------------------------------------------- */ +template <typename T> +int nc_put_var1_bigint(int ncid, int varid, const size_t index[], const T* tp) +{ + return nc_put_var1_int(ncid, varid, index, tp); +} + +template <> +int nc_put_var1_bigint<long>(int ncid, int varid, const size_t index[], + const long* tp) +{ + return nc_put_var1_long(ncid, varid, index, tp); +} + +template <> +int nc_put_var1_bigint<long long>(int ncid, int varid, const size_t index[], + const long long* tp) +{ + return nc_put_var1_longlong(ncid, varid, index, tp); +} + void DumpNetCDF::write() { // open file @@ -638,13 +682,8 @@ void DumpNetCDF::write() th->keyword[i] ); } else if (th->vtype[i] == BIGINT) { -#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) - NCERRX( nc_put_var1_long(ncid, thermovar[i], start, &th->bivalue), - th->keyword[i] ); -#else - NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &th->bivalue), + NCERRX( nc_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue), th->keyword[i] ); -#endif } } } @@ -888,8 +927,11 @@ int DumpNetCDF::modify_param(int narg, char **arg) } else if (strcmp(arg[iarg],"at") == 0) { iarg++; + if (iarg >= narg) + error->all(FLERR,"expected additional arg after 'at' keyword."); framei = force->inumeric(FLERR,arg[iarg]); - if (framei < 0) framei--; + if (framei == 0) error->all(FLERR,"frame 0 not allowed for 'at' keyword."); + else if (framei < 0) framei--; iarg++; return 2; } @@ -911,68 +953,6 @@ int DumpNetCDF::modify_param(int narg, char **arg) /* ---------------------------------------------------------------------- */ -void DumpNetCDF::write_prmtop() -{ - char fn[1024]; - char tmp[81]; - FILE *f; - - strcpy(fn, filename); - strcat(fn, ".prmtop"); - - f = fopen(fn, "w"); - fprintf(f, "%%VERSION LAMMPS\n"); - fprintf(f, "%%FLAG TITLE\n"); - fprintf(f, "%%FORMAT(20a4)\n"); - memset(tmp, ' ', 76); - tmp[76] = '\0'; - fprintf(f, "NASN%s\n", tmp); - - fprintf(f, "%%FLAG POINTERS\n"); - fprintf(f, "%%FORMAT(10I8)\n"); -#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) - fprintf(f, "%8li", ntotalgr); -#else - fprintf(f, "%8i", ntotalgr); -#endif - for (int i = 0; i < 11; i++) - fprintf(f, "%8i", 0); - fprintf(f, "\n"); - for (int i = 0; i < 12; i++) - fprintf(f, "%8i", 0); - fprintf(f, "\n"); - for (int i = 0; i < 6; i++) - fprintf(f, "%8i", 0); - fprintf(f, "\n"); - - fprintf(f, "%%FLAG ATOM_NAME\n"); - fprintf(f, "%%FORMAT(20a4)\n"); - for (int i = 0; i < ntotalgr; i++) { - fprintf(f, "%4s", "He"); - if ((i+1) % 20 == 0) - fprintf(f, "\n"); - } - - fprintf(f, "%%FLAG CHARGE\n"); - fprintf(f, "%%FORMAT(5E16.5)\n"); - for (int i = 0; i < ntotalgr; i++) { - fprintf(f, "%16.5e", 0.0); - if ((i+1) % 5 == 0) - fprintf(f, "\n"); - } - - fprintf(f, "%%FLAG MASS\n"); - fprintf(f, "%%FORMAT(5E16.5)\n"); - for (int i = 0; i < ntotalgr; i++) { - fprintf(f, "%16.5e", 1.0); - if ((i+1) % 5 == 0) - fprintf(f, "\n"); - } - fclose(f); -} - -/* ---------------------------------------------------------------------- */ - void DumpNetCDF::ncerr(int err, const char *descr, int line) { if (err != NC_NOERR) { diff --git a/src/USER-NETCDF/dump_netcdf.h b/src/USER-NETCDF/dump_netcdf.h index b86f294d3084d41ff7116ef995b7852a55f31bc5..25d64efade446861f152cc27f60728e7a15eb781 100644 --- a/src/USER-NETCDF/dump_netcdf.h +++ b/src/USER-NETCDF/dump_netcdf.h @@ -92,7 +92,6 @@ class DumpNetCDF : public DumpCustom { void closefile(); virtual void write_header(bigint); virtual void write_data(int, double *); - void write_prmtop(); virtual int modify_param(int, char **); diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.cpp b/src/USER-NETCDF/dump_netcdf_mpiio.cpp index 3b753b1b0453963fc7002e7f6bbfbd690b1178aa..890029371e53fea15213fbc5178c50602e0eaad5 100644 --- a/src/USER-NETCDF/dump_netcdf_mpiio.cpp +++ b/src/USER-NETCDF/dump_netcdf_mpiio.cpp @@ -88,8 +88,8 @@ DumpNetCDFMPIIO::DumpNetCDFMPIIO(LAMMPS *lmp, int narg, char **arg) : if (multiproc) error->all(FLERR,"Multi-processor writes are not supported."); - if (multifile) - error->all(FLERR,"Multiple files are not supported."); + if (append_flag && multifile) + error->all(FLERR,"Cannot append when writing to multiple files."); perat = new nc_perat_t[nfield]; @@ -217,6 +217,24 @@ DumpNetCDFMPIIO::~DumpNetCDFMPIIO() void DumpNetCDFMPIIO::openfile() { + char *filecurrent = filename; + if (multifile && !singlefile_opened) { + char *filestar = filecurrent; + filecurrent = new char[strlen(filestar) + 16]; + char *ptr = strchr(filestar,'*'); + *ptr = '\0'; + if (padflag == 0) + sprintf(filecurrent,"%s" BIGINT_FORMAT "%s", + filestar,update->ntimestep,ptr+1); + else { + char bif[8],pad[16]; + strcpy(bif,BIGINT_FORMAT); + sprintf(pad,"%%s%%0%d%s%%s",padflag,&bif[1]); + sprintf(filecurrent,pad,filestar,update->ntimestep,ptr+1); + } + *ptr = '*'; + } + if (thermo && !singlefile_opened) { if (thermovar) delete [] thermovar; thermovar = new int[output->thermo->nfield]; @@ -260,7 +278,7 @@ void DumpNetCDFMPIIO::openfile() // get total number of atoms ntotalgr = group->count(igroup); - if (append_flag && access(filename, F_OK) != -1) { + if (append_flag && !multifile && access(filecurrent, F_OK) != -1) { // Fixme! Perform checks if dimensions and variables conform with // data structure standard. @@ -270,8 +288,8 @@ void DumpNetCDFMPIIO::openfile() if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( ncmpi_open(MPI_COMM_WORLD, filename, NC_WRITE, MPI_INFO_NULL, - &ncid), filename ); + NCERRX( ncmpi_open(MPI_COMM_WORLD, filecurrent, NC_WRITE, MPI_INFO_NULL, + &ncid), filecurrent ); // dimensions NCERRX( ncmpi_inq_dimid(ncid, NC_FRAME_STR, &frame_dim), NC_FRAME_STR ); @@ -333,10 +351,12 @@ void DumpNetCDFMPIIO::openfile() NCERR( ncmpi_inq_dimlen(ncid, frame_dim, &nframes) ); // framei == -1 means append to file, == -2 means override last frame // Note that in the input file this translates to 'yes', '-1', etc. - if (framei < 0 || (append_flag && framei == 0)) framei = nframes+framei+1; + if (framei <= 0) framei = nframes+framei+1; if (framei < 1) framei = 1; - } - else { + } else { + if (framei != 0) + error->all(FLERR,"at keyword requires use of 'append yes'"); + int dims[NC_MAX_VAR_DIMS]; MPI_Offset index[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS]; double d[1]; @@ -344,8 +364,8 @@ void DumpNetCDFMPIIO::openfile() if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( ncmpi_create(MPI_COMM_WORLD, filename, NC_64BIT_DATA, - MPI_INFO_NULL, &ncid), filename ); + NCERRX( ncmpi_create(MPI_COMM_WORLD, filecurrent, NC_64BIT_DATA, + MPI_INFO_NULL, &ncid), filecurrent ); // dimensions NCERRX( ncmpi_def_dim(ncid, NC_FRAME_STR, NC_UNLIMITED, &frame_dim), @@ -574,15 +594,40 @@ void DumpNetCDFMPIIO::closefile() if (singlefile_opened) { NCERR( ncmpi_close(ncid) ); singlefile_opened = 0; - // append next time DumpNetCDFMPIIO::openfile is called - append_flag = 1; // write to next frame upon next open - framei++; + if (multifile) + framei = 1; + else { + // append next time DumpNetCDFMPIIO::openfile is called + append_flag = 1; + framei++; + } } } /* ---------------------------------------------------------------------- */ +template <typename T> +int ncmpi_put_var1_bigint(int ncid, int varid, const MPI_Offset index[], + const T* tp) +{ + return ncmpi_put_var1_int(ncid, varid, index, tp); +} + +template <> +int ncmpi_put_var1_bigint<long>(int ncid, int varid, const MPI_Offset index[], + const long* tp) +{ + return ncmpi_put_var1_long(ncid, varid, index, tp); +} + +template <> +int ncmpi_put_var1_bigint<long long>(int ncid, int varid, const MPI_Offset index[], + const long long* tp) +{ + return ncmpi_put_var1_longlong(ncid, varid, index, tp); +} + void DumpNetCDFMPIIO::write() { // open file @@ -616,13 +661,8 @@ void DumpNetCDFMPIIO::write() th->keyword[i] ); } else if (th->vtype[i] == BIGINT) { -#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) - NCERRX( ncmpi_put_var1_long(ncid, thermovar[i], start, &th->bivalue), - th->keyword[i] ); -#else - NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &th->bivalue), + NCERRX( ncmpi_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue), th->keyword[i] ); -#endif } } } @@ -883,8 +923,11 @@ int DumpNetCDFMPIIO::modify_param(int narg, char **arg) } else if (strcmp(arg[iarg],"at") == 0) { iarg++; + if (iarg >= narg) + error->all(FLERR,"expected additional arg after 'at' keyword."); framei = force->inumeric(FLERR,arg[iarg]); - if (framei < 0) framei--; + if (framei == 0) error->all(FLERR,"frame 0 not allowed for 'at' keyword."); + else if (framei < 0) framei--; iarg++; return 2; } diff --git a/src/USER-OMP/bond_gromos_omp.cpp b/src/USER-OMP/bond_gromos_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7904c4683b4c5526a99b2e4fa56049cc43be2dbf --- /dev/null +++ b/src/USER-OMP/bond_gromos_omp.cpp @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_gromos_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include <math.h> + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondGromosOMP::BondGromosOMP(class LAMMPS *lmp) + : BondGromos(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondGromosOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (inum > 0) { + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + thr->timer(Timer::BOND); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void BondGromosOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const int3_t * _noalias const bondlist = (int3_t *) neighbor->bondlist[0]; + const int nlocal = atom->nlocal; + ebond = 0.0; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n].a; + i2 = bondlist[n].b; + type = bondlist[n].t; + + delx = x[i1].x - x[i2].x; + dely = x[i1].y - x[i2].y; + delz = x[i1].z - x[i2].z; + + const double rsq = delx*delx + dely*dely + delz*delz; + const double dr = rsq - r0[type]*r0[type]; + const double kdr = k[type]*dr; + + // force & energy + + fbond = -4.0 * kdr; + + if (EFLAG) ebond = kdr; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += delx*fbond; + f[i1].y += dely*fbond; + f[i1].z += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x -= delx*fbond; + f[i2].y -= dely*fbond; + f[i2].z -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_gromos_omp.h b/src/USER-OMP/bond_gromos_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..69e92e42950a17855642f940c083c04e17378aab --- /dev/null +++ b/src/USER-OMP/bond_gromos_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(gromos/omp,BondGromosOMP) + +#else + +#ifndef LMP_BOND_GROMOS_OMP_H +#define LMP_BOND_GROMOS_OMP_H + +#include "bond_gromos.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondGromosOMP : public BondGromos, public ThrOMP { + + public: + BondGromosOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_neigh_history_omp.cpp b/src/USER-OMP/fix_neigh_history_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ecc3147ed5d0515dfb13d743531ed28599e11b40 --- /dev/null +++ b/src/USER-OMP/fix_neigh_history_omp.cpp @@ -0,0 +1,603 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include <string.h> +#include <stdio.h> +#include "fix_neigh_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "memory.h" +#include "modify.h" +#include "error.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history + + +FixNeighHistoryOMP::FixNeighHistoryOMP(class LAMMPS *lmp,int narg,char **argv) + : FixNeighHistory(lmp,narg,argv) { + +} + + +/* ---------------------------------------------------------------------- + copy partner info from neighbor data structs (NDS) to atom arrays + should be called whenever NDS store current history info + and need to transfer the info to owned atoms + e.g. when atoms migrate to new procs, new neigh list built, or between runs + when atoms may be added or deleted (NDS becomes out-of-date) + the next post_neighbor() will put this info back into new NDS + called during run before atom exchanges, including for restart files + called at end of run via post_run() + do not call during setup of run (setup_pre_exchange) + b/c there is no guarantee of a current NDS (even on continued run) + if run command does a 2nd run with pre = no, then no neigh list + will be built, but old neigh list will still have the info + onesided and newton on and newton off versions +------------------------------------------------------------------------- */ +// below is the pre_exchange() function from the parent class +// void FixNeighHistory::pre_exchange() +// { +// if (onesided) pre_exchange_onesided(); +// else if (newton_pair) pre_exchange_newton(); +// else pre_exchange_no_newton(); +//} + +/* ---------------------------------------------------------------------- + onesided version for sphere contact with line/tri particles + neighbor list has I = sphere, J = line/tri + only store history info with spheres +------------------------------------------------------------------------- */ + +void FixNeighHistoryOMP::pre_exchange_onesided() +{ + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + maxpartner = 0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + int i,j,ii,jj,m,n,inum,jnum; + int *ilist,*jlist,*numneigh,**firstneigh; + int *allflags; + double *allvalues,*onevalues; + + // NOTE: all operations until very end are with: + // nlocal_neigh <= current nlocal + // b/c previous neigh list was built with nlocal_neigh + // nlocal can be larger if other fixes added atoms at this pre_exchange() + + // clear per-thread paged data structures + + MyPage <tagint> &ipg = ipage_atom[tid]; + MyPage <double> &dpg = dpage_atom[tid]; + ipg.reset(); + dpg.reset(); + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + nlocal_neigh/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax; + + // 1st loop over neighbor list, I = sphere, J = tri + // only calculate npartner for each owned spheres + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + tagint *tag = atom->tag; + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + + for (jj = 0; jj < jnum; jj++) + if (allflags[jj]) + if ((i >= lfrom) && (i < lto)) npartner[i]++; + } + + // get page chunks to store atom IDs and shear history for my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if ((i >= lfrom) && (i < lto)) { + n = npartner[i]; + partner[i] = ipg.get(n); + valuepartner[i] = dpg.get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); + } + } + + // 2nd loop over neighbor list + // store partner IDs and values for owned+ghost atoms + // re-zero npartner to use as counter + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; + + for (jj = 0; jj < jnum; jj++) { + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; + j = jlist[jj]; + j &= NEIGHMASK; + + if ((i >= lfrom) && (i < lto)) { + m = npartner[i]++; + partner[i][m] = tag[j]; + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); + } + } + } + } + + // set maxpartner = max # of partners of any owned atom + maxpartner = m = 0; + for (i = lfrom; i < lto; i++) + m = MAX(m,npartner[i]); + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + maxpartner = MAX(m,maxpartner); + comm->maxexchange_fix =MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); + } + } + + // zero npartner values from previous nlocal_neigh to current nlocal + for (int i = nlocal_neigh; i < nlocal; ++i) npartner[i] = 0; +} + +/* -------------------------------------------------------------------- */ + +void FixNeighHistoryOMP::pre_exchange_newton() +{ + const int nthreads = comm->nthreads; + maxpartner = 0; + for (int i = 0; i < nall_neigh; i++) npartner[i] = 0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + int i,j,ii,jj,m,n,inum,jnum; + int *ilist,*jlist,*numneigh,**firstneigh; + int *allflags; + double *allvalues,*onevalues,*jvalues; + + MyPage <tagint> &ipg = ipage_atom[tid]; + MyPage <double> &dpg = dpage_atom[tid]; + ipg.reset(); + dpg.reset(); + + // 1st loop over neighbor list + // calculate npartner for each owned+ghost atom + + tagint *tag = atom->tag; + + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + nlocal_neigh/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + + for (jj = 0; jj < jnum; jj++) { + if (allflags[jj]) { + if ((i >= lfrom) && (i < lto)) + npartner[i]++; + + j = jlist[jj]; + j &= NEIGHMASK; + if ((j >= lfrom) && (j < lto)) + npartner[j]++; + } + } + } +#if defined(_OPENMP) +#pragma omp barrier + {;} + + // perform reverse comm to augment owned npartner counts with ghost counts + +#pragma omp master +#endif + { + commflag = NPARTNER; + comm->reverse_comm_fix(this,0); + } + + // get page chunks to store atom IDs and shear history for my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if ((i >= lfrom) && (i < lto)) { + n = npartner[i]; + partner[i] = ipg.get(n); + valuepartner[i] = dpg.get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); + } + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { + for (i = nlocal_neigh; i < nall_neigh; i++) { + n = npartner[i]; + partner[i] = ipg.get(n); + valuepartner[i] = dpg.get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) { + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); + } + } + } + + // 2nd loop over neighbor list + // store partner IDs and values for owned+ghost atoms + // re-zero npartner to use as counter + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; + + for (jj = 0; jj < jnum; jj++) { + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; + j = jlist[jj]; + j &= NEIGHMASK; + + if ((i >= lfrom) && (i < lto)) { + m = npartner[i]++; + partner[i][m] = tag[j]; + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); + } + + if ((j >= lfrom) && (j < lto)) { + m = npartner[j]++; + partner[j][m] = tag[i]; + jvalues = &valuepartner[j][dnum*m]; + for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n]; + } + } + } + } +#if defined(_OPENMP) +#pragma omp barrier + {;} + +#pragma omp master +#endif + { + // perform reverse comm to augment + // owned atom partner/valuepartner with ghost info + // use variable variant b/c size of packed data can be arbitrarily large + // if many touching neighbors for large particle + + commflag = PERPARTNER; + comm->reverse_comm_fix_variable(this); + } + + // set maxpartner = max # of partners of any owned atom + m = 0; + for (i = lfrom; i < lto; i++) + m = MAX(m,npartner[i]); + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + maxpartner = MAX(m,maxpartner); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1); + } + } + + // zero npartner values from previous nlocal_neigh to current nlocal + + int nlocal = atom->nlocal; + for (int i = nlocal_neigh; i < nlocal; i++) npartner[i] = 0; +} + +/* -------------------------------------------------------------------- */ + +void FixNeighHistoryOMP::pre_exchange_no_newton() +{ + const int nthreads = comm->nthreads; + maxpartner = 0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + int i,j,ii,jj,m,n,inum,jnum; + int *ilist,*jlist,*numneigh,**firstneigh; + int *allflags; + double *allvalues,*onevalues,*jvalues; + + MyPage <tagint> &ipg = ipage_atom[tid]; + MyPage <double> &dpg = dpage_atom[tid]; + ipg.reset(); + dpg.reset(); + + // 1st loop over neighbor list + // calculate npartner for each owned atom + + tagint *tag = atom->tag; + + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + nlocal_neigh/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax; + + // zero npartners for all current atoms and + // clear page data structures for this thread + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + + for (jj = 0; jj < jnum; jj++) { + if (allflags[jj]) { + if ((i >= lfrom) && (i < lto)) + npartner[i]++; + + j = jlist[jj]; + j &= NEIGHMASK; + if ((j >= lfrom) && (j < lto)) + npartner[j]++; + } + } + } + + // get page chunks to store atom IDs and shear history for my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if ((i >= lfrom) && (i < lto)) { + n = npartner[i]; + partner[i] = ipg.get(n); + valuepartner[i] = dpg.get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); + } + } + + // 2nd loop over neighbor list + // store partner IDs and values for owned+ghost atoms + // re-zero npartner to use as counter + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; + + for (jj = 0; jj < jnum; jj++) { + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; + j = jlist[jj]; + j &= NEIGHMASK; + + if ((i >= lfrom) && (i < lto)) { + m = npartner[i]++; + partner[i][m] = tag[j]; + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); + } + + if ((j >= lfrom) && (j < lto)) { + m = npartner[j]++; + partner[j][m] = tag[i]; + jvalues = &valuepartner[j][dnum*m]; + for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n]; + } + } + } + } + + // set maxpartner = max # of partners of any owned atom + m = 0; + for (i = lfrom; i < lto; i++) + m = MAX(m,npartner[i]); + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + maxpartner = MAX(m,maxpartner); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); + } + } +} + +/* -------------------------------------------------------------------- */ + +void FixNeighHistoryOMP::post_neighbor() +{ + const int nthreads = comm->nthreads; + maxpartner = 0; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + nlocal_neigh = nlocal; + nall_neigh = nall; + + // realloc firstflag and firstvalue if needed + + if (maxatom < nlocal) { + memory->sfree(firstflag); + memory->sfree(firstvalue); + maxatom = nall; + firstflag = (int **) + memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag"); + firstvalue = (double **) + memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue"); + } + + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + int i,j,ii,jj,m,nn,np,inum,jnum,rflag; + tagint jtag; + int *ilist,*jlist,*numneigh,**firstneigh; + int *allflags; + double *allvalues; + + MyPage <tagint> &ipg = ipage_atom[tid]; + MyPage <double> &dpg = dpage_atom[tid]; + ipg.reset(); + dpg.reset(); + + // 1st loop over neighbor list + // calculate npartner for each owned atom + + tagint *tag = atom->tag; + + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + inum/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > inum) ? inum : lmax; + + for (ii = lfrom; ii < lto; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + firstflag[i] = allflags = ipg.get(jnum); + firstvalue[i] = allvalues = dpg.get(jnum*dnum); + np = npartner[i]; + nn = 0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + rflag = sbmask(j); + j &= NEIGHMASK; + jlist[jj] = j; + + // rflag = 1 if r < radsum in npair_size() method + // preserve neigh history info if tag[j] is in old-neigh partner list + // this test could be more geometrically precise for two sphere/line/tri + + if (rflag) { + jtag = tag[j]; + for (m = 0; m < np; m++) + if (partner[i][m] == jtag) break; + if (m < np) { + allflags[jj] = 1; + memcpy(&allvalues[nn],&valuepartner[i][dnum*m],dnumbytes); + } else { + allflags[jj] = 0; + memcpy(&allvalues[nn],zeroes,dnumbytes); + } + } else { + allflags[jj] = 0; + memcpy(&allvalues[nn],zeroes,dnumbytes); + } + nn += dnum; + } + } + } +} diff --git a/src/USER-OMP/fix_shear_history_omp.h b/src/USER-OMP/fix_neigh_history_omp.h similarity index 64% rename from src/USER-OMP/fix_shear_history_omp.h rename to src/USER-OMP/fix_neigh_history_omp.h index 95281b2afc56b5b5bbbc3f946ce61ef5a6998f34..9cd97ce3da76db17ca49727d433028971790b550 100644 --- a/src/USER-OMP/fix_shear_history_omp.h +++ b/src/USER-OMP/fix_neigh_history_omp.h @@ -13,23 +13,25 @@ #ifdef FIX_CLASS -FixStyle(SHEAR_HISTORY/omp,FixShearHistoryOMP) +FixStyle(NEIGH_HISTORY/omp,FixNeighHistoryOMP) #else -#ifndef LMP_FIX_SHEAR_HISTORY_OMP_H -#define LMP_FIX_SHEAR_HISTORY_OMP_H +#ifndef LMP_FIX_NEIGH_HISTORY_OMP_H +#define LMP_FIX_NEIGH_HISTORY_OMP_H -#include "fix_shear_history.h" +#include "fix_neigh_history.h" namespace LAMMPS_NS { -class FixShearHistoryOMP : public FixShearHistory { +class FixNeighHistoryOMP : public FixNeighHistory { public: - FixShearHistoryOMP(class LAMMPS *lmp, int narg, char **argv) - : FixShearHistory(lmp,narg,argv) {}; - virtual void pre_exchange(); + FixNeighHistoryOMP(class LAMMPS *lmp, int narg, char **argv); + void pre_exchange_onesided(); + void pre_exchange_newton(); + void pre_exchange_no_newton(); + void post_neighbor(); }; } diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp index 4457ab6592b90f246426416d42450284423daef5..d89c9627fe30a79d2b6d3ee2e3052fe322becf35 100644 --- a/src/USER-OMP/fix_qeq_reax_omp.cpp +++ b/src/USER-OMP/fix_qeq_reax_omp.cpp @@ -703,7 +703,7 @@ void FixQEqReaxOMP::calculate_Q() q[i] = s[i] - u * t[i]; // backup s & t - for (int k = 4; k > 0; --k) { + for (int k = nprev-1; k > 0; --k) { s_hist[i][k] = s_hist[i][k-1]; t_hist[i][k] = t_hist[i][k-1]; } diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp deleted file mode 100644 index 4180e0af41ada456b73db4a6973bd2dff5c2c766..0000000000000000000000000000000000000000 --- a/src/USER-OMP/fix_shear_history_omp.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include <string.h> -#include <stdio.h> -#include "fix_shear_history_omp.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "force.h" -#include "pair.h" -#include "update.h" -#include "modify.h" -#include "error.h" - -#if defined(_OPENMP) -#include <omp.h> -#endif - -using namespace LAMMPS_NS; -using namespace FixConst; - -/* ---------------------------------------------------------------------- - copy shear partner info from neighbor lists to per-atom arrays - so it can be exchanged with those atoms -------------------------------------------------------------------------- */ - -void FixShearHistoryOMP::pre_exchange() -{ - const int nthreads = comm->nthreads; - maxtouch = 0; - -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - -#if defined(_OPENMP) - const int tid = omp_get_thread_num(); -#else - const int tid = 0; -#endif - - int i,j,ii,jj,m,n,inum,jnum; - int *ilist,*jlist,*numneigh,**firstneigh; - int *touch,**firsttouch; - double *shear,*shearj,*allshear,**firstshear; - - MyPage <tagint> &ipg = ipage[tid]; - MyPage <double> &dpg = dpage[tid]; - ipg.reset(); - dpg.reset(); - - // 1st loop over neighbor list - // calculate nparter for each owned atom - - tagint *tag = atom->tag; - - NeighList *list = pair->list; - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; - - int nlocal_neigh = 0; - if (inum) nlocal_neigh = ilist[inum-1] + 1; - - // each thread works on a fixed chunk of local and ghost atoms. - const int ldelta = 1 + nlocal_neigh/nthreads; - const int lfrom = tid*ldelta; - const int lmax = lfrom +ldelta; - const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax; - - // zero npartners for all current atoms and - // clear page data structures for this thread - - for (i = lfrom; i < lto; i++) npartner[i] = 0; - - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - touch = firsttouch[i]; - - for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { - if ((i >= lfrom) && (i < lto)) - npartner[i]++; - - j = jlist[jj]; - j &= NEIGHMASK; - if ((j >= lfrom) && (j < lto)) - npartner[j]++; - } - } - } - - // get page chunks to store atom IDs and shear history for my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - if ((i >= lfrom) && (i < lto)) { - n = npartner[i]; - partner[i] = ipg.get(n); - shearpartner[i] = dpg.get(dnum*n); - if (partner[i] == NULL || shearpartner[i] == NULL) - error->one(FLERR,"Shear history overflow, boost neigh_modify one"); - } - } - - // 2nd loop over neighbor list - // store atom IDs and shear history for my atoms - // re-zero npartner to use as counter for all my atoms - - for (i = lfrom; i < lto; i++) npartner[i] = 0; - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - jlist = firstneigh[i]; - allshear = firstshear[i]; - jnum = numneigh[i]; - touch = firsttouch[i]; - - for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { - shear = &allshear[3*jj]; - j = jlist[jj]; - j &= NEIGHMASK; - - if ((i >= lfrom) && (i < lto)) { - m = npartner[i]++; - partner[i][m] = tag[j]; - memcpy(&shearpartner[i][dnum*m],shear,dnumbytes); - } - - if ((j >= lfrom) && (j < lto)) { - m = npartner[j]++; - partner[j][m] = tag[i]; - shearj = &shearpartner[j][dnum*m]; - for (n = 0; n < dnum; n++) shearj[n] = -shear[n]; - } - } - } - } - - // set maxtouch = max # of partners of any owned atom - maxtouch = m = 0; - for (i = lfrom; i < lto; i++) - m = MAX(m,npartner[i]); - -#if defined(_OPENMP) -#pragma omp critical -#endif - maxtouch = MAX(m,maxtouch); - } -} diff --git a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp index 45add87092de8fcc505624dad338e0328af90d05..f094691b71da6e7dc291bde662a88251f79e796a 100644 --- a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp @@ -45,12 +45,10 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; - NeighList *listinner = list->listinner; - NeighList *listmiddle = list->listmiddle; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); @@ -77,26 +75,26 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list) int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; int *ilist_middle,*numneigh_middle,**firstneigh_middle; if (respamiddle) { - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; } // each thread has its own page allocator MyPage<int> &ipage = list->ipage[tid]; - MyPage<int> &ipage_inner = listinner->ipage[tid]; + MyPage<int> &ipage_inner = list->ipage_inner[tid]; ipage.reset(); ipage_inner.reset(); MyPage<int> *ipage_middle; if (respamiddle) { - ipage_middle = listmiddle->ipage + tid; + ipage_middle = list->ipage_middle + tid; ipage_middle->reset(); } @@ -199,6 +197,6 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list) } NPAIR_OMP_CLOSE; list->inum = nlocal; - listinner->inum = nlocal; - if (respamiddle) listmiddle->inum = nlocal; + list->inum_inner = nlocal; + if (respamiddle) list->inum_middle = nlocal; } diff --git a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp index ee6b9b7501235f8bdcd425b4eb9340be286980aa..de7ef5f7d52b4b04d11dde4a16212de34dee8941 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp @@ -44,12 +44,10 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; - NeighList *listinner = list->listinner; - NeighList *listmiddle = list->listmiddle; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); @@ -76,26 +74,26 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list) int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; int *ilist_middle,*numneigh_middle,**firstneigh_middle; if (respamiddle) { - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; } // each thread has its own page allocator MyPage<int> &ipage = list->ipage[tid]; - MyPage<int> &ipage_inner = listinner->ipage[tid]; + MyPage<int> &ipage_inner = list->ipage_inner[tid]; ipage.reset(); ipage_inner.reset(); MyPage<int> *ipage_middle; if (respamiddle) { - ipage_middle = listmiddle->ipage + tid; + ipage_middle = list->ipage_middle + tid; ipage_middle->reset(); } @@ -245,6 +243,6 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list) } NPAIR_OMP_CLOSE; list->inum = nlocal; - listinner->inum = nlocal; - if (respamiddle) listmiddle->inum = nlocal; + list->inum_inner = nlocal; + if (respamiddle) list->inum_middle = nlocal; } diff --git a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp index fbb512ba646ff5a23201507867d6d84dc4e4ac90..f20d101bc9dbc1f131e5a7435751ed471626266a 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp @@ -44,12 +44,10 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_INIT; - NeighList *listinner = list->listinner; - NeighList *listmiddle = list->listmiddle; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); @@ -76,26 +74,26 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; int *ilist_middle,*numneigh_middle,**firstneigh_middle; if (respamiddle) { - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; } // each thread has its own page allocator MyPage<int> &ipage = list->ipage[tid]; - MyPage<int> &ipage_inner = listinner->ipage[tid]; + MyPage<int> &ipage_inner = list->ipage_inner[tid]; ipage.reset(); ipage_inner.reset(); MyPage<int> *ipage_middle; if (respamiddle) { - ipage_middle = listmiddle->ipage + tid; + ipage_middle = list->ipage_middle + tid; ipage_middle->reset(); } @@ -206,6 +204,6 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) } NPAIR_OMP_CLOSE; list->inum = nlocal; - listinner->inum = nlocal; - if (respamiddle) listmiddle->inum = nlocal; + list->inum_inner = nlocal; + if (respamiddle) list->inum_middle = nlocal; } diff --git a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp index 5ee71bebad9e5668461dcc2b9702eccad7df774f..0f726cdd7f322dafa176884f1510134a2ce5e6a3 100644 --- a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp @@ -46,12 +46,10 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; - NeighList *listinner = list->listinner; - NeighList *listmiddle = list->listmiddle; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); @@ -80,26 +78,26 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list) int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; int *ilist_middle,*numneigh_middle,**firstneigh_middle; if (respamiddle) { - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; } // each thread has its own page allocator MyPage<int> &ipage = list->ipage[tid]; - MyPage<int> &ipage_inner = listinner->ipage[tid]; + MyPage<int> &ipage_inner = list->ipage_inner[tid]; ipage.reset(); ipage_inner.reset(); MyPage<int> *ipage_middle; if (respamiddle) { - ipage_middle = listmiddle->ipage + tid; + ipage_middle = list->ipage_middle + tid; ipage_middle->reset(); } @@ -193,6 +191,6 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list) } NPAIR_OMP_CLOSE; list->inum = nlocal; - listinner->inum = nlocal; - if (respamiddle) listmiddle->inum = nlocal; + list->inum_inner = nlocal; + if (respamiddle) list->inum_middle = nlocal; } diff --git a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp index 89cff732c950bffa515a7a691cd977dfdb26c378..2783e1255eb1147a15c5ebc04a41905c0a6ac27f 100644 --- a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp +++ b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp @@ -47,12 +47,10 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; - NeighList *listinner = list->listinner; - NeighList *listmiddle = list->listmiddle; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); @@ -81,26 +79,26 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; int *ilist_middle,*numneigh_middle,**firstneigh_middle; if (respamiddle) { - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; } // each thread has its own page allocator MyPage<int> &ipage = list->ipage[tid]; - MyPage<int> &ipage_inner = listinner->ipage[tid]; + MyPage<int> &ipage_inner = list->ipage_inner[tid]; ipage.reset(); ipage_inner.reset(); MyPage<int> *ipage_middle; if (respamiddle) { - ipage_middle = listmiddle->ipage + tid; + ipage_middle = list->ipage_middle + tid; ipage_middle->reset(); } @@ -212,6 +210,6 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) } NPAIR_OMP_CLOSE; list->inum = nlocal; - listinner->inum = nlocal; - if (respamiddle) listmiddle->inum = nlocal; + list->inum_inner = nlocal; + if (respamiddle) list->inum_middle = nlocal; } diff --git a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp index 120658b7143f5fe72a97e066e1972d7fdf836187..6a1cb46ea6272d9c312c8c555b74fdbf2eebdbd7 100644 --- a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp @@ -18,9 +18,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -34,7 +31,6 @@ NPairHalfSizeBinNewtoffOmp::NPairHalfSizeBinNewtoffOmp(LAMMPS *lmp) : /* ---------------------------------------------------------------------- size particles binned neighbor list construction with partial Newton's 3rd law - shear history must be accounted for when a neighbor pair is added each owned atom i checks own bin and surrounding bins in non-Newton stencil pair stored once if i,j are both owned and i < j pair stored by me if j is ghost (also stored by proc owning j) @@ -43,30 +39,20 @@ NPairHalfSizeBinNewtoffOmp::NPairHalfSizeBinNewtoffOmp(LAMMPS *lmp) : void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; - - FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history; - NeighList * listhistory = list->listhistory; + const int history = list->history; + const int mask_history = 3 << SBBITS; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listhistory) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,k,m,n,nn,ibin,dnum,dnumbytes; + int i,j,k,m,n,nn,ibin; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; + int *neighptr; // loop over each atom, storing neighbors @@ -85,29 +71,10 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) MyPage<int> &ipage = list->ipage[tid]; ipage.reset(); - if (fix_history) { - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage+tid; - dpage_shear = listhistory->dpage+tid; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - ipage_touch->reset(); - dpage_shear->reset(); - } - for (i = ifrom; i < ito; i++) { n = 0; neighptr = ipage.vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -133,29 +100,10 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } } @@ -166,13 +114,6 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) ipage.vgot(n); if (ipage.status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } NPAIR_OMP_CLOSE; list->inum = nlocal; diff --git a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp index cf0c6d20fe565c4058dce2ac728854a4a36fcf2e..d8e1e6da449c9e04698dc79df919a802e907cec9 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp @@ -18,9 +18,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -34,7 +31,6 @@ NPairHalfSizeBinNewtonOmp::NPairHalfSizeBinNewtonOmp(LAMMPS *lmp) : /* ---------------------------------------------------------------------- size particles binned neighbor list construction with full Newton's 3rd law - shear history must be accounted for when a neighbor pair is added each owned atom i checks its own bin and other bins in Newton stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ @@ -42,36 +38,20 @@ NPairHalfSizeBinNewtonOmp::NPairHalfSizeBinNewtonOmp(LAMMPS *lmp) : void NPairHalfSizeBinNewtonOmp::build(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; - - FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history; - NeighList * listhistory = list->listhistory; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - } + const int history = list->history; + const int mask_history = 3 << SBBITS; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listhistory) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,k,m,n,nn,ibin,dnum,dnumbytes; + int i,j,k,m,n,nn,ibin; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - - // loop over each atom, storing neighbors + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -88,29 +68,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) MyPage<int> &ipage = list->ipage[tid]; ipage.reset(); - if (fix_history) { - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage+tid; - dpage_shear = listhistory->dpage+tid; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - ipage_touch->reset(); - dpage_shear->reset(); - } - for (i = ifrom; i < ito; i++) { n = 0; neighptr = ipage.vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -140,29 +101,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -181,29 +123,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } } @@ -214,13 +137,6 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) ipage.vgot(n); if (ipage.status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } NPAIR_OMP_CLOSE; list->inum = nlocal; diff --git a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp index da04eebd1eb70ac0422e35b1299bc9dd04b58c33..b02bfa345e8b1b9ccceae4e55ba0c482df8e46a2 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp @@ -17,8 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" #include "my_page.h" #include "error.h" @@ -32,7 +30,6 @@ NPairHalfSizeBinNewtonTriOmp::NPairHalfSizeBinNewtonTriOmp(LAMMPS *lmp) : /* ---------------------------------------------------------------------- size particles binned neighbor list construction with Newton's 3rd law for triclinic - no shear history is allowed for this option each owned atom i checks its own bin and other bins in triclinic stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ @@ -40,6 +37,8 @@ NPairHalfSizeBinNewtonTriOmp::NPairHalfSizeBinNewtonTriOmp(LAMMPS *lmp) : void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int history = list->history; + const int mask_history = 3 << SBBITS; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -105,7 +104,12 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) radsum = radi + radius[j]; cutsq = (radsum+skin) * (radsum+skin); - if (rsq <= cutsq) neighptr[n++] = j; + if (rsq <= cutsq) { + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; + } } } diff --git a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp index f898ec3828b20db8f258a78c6d911c46b6f4b1b1..3c7b6b118f5f9a4da6f324acb9b217097bf69605 100644 --- a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp @@ -19,9 +19,6 @@ #include "atom.h" #include "atom_vec.h" #include "group.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -44,34 +41,20 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; - - FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history; - NeighList * listhistory = list->listhistory; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - } + const int history = list->history; + const int mask_history = 3 << SBBITS; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listhistory) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,m,n,nn,dnum,dnumbytes; + int i,j,m,n,nn; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -89,29 +72,10 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list) MyPage<int> &ipage = list->ipage[tid]; ipage.reset(); - if (fix_history) { - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage+tid; - dpage_shear = listhistory->dpage+tid; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - ipage_touch->reset(); - dpage_shear->reset(); - } - for (i = ifrom; i < ito; i++) { n = 0; neighptr = ipage.vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -132,29 +96,10 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -164,13 +109,6 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list) ipage.vgot(n); if (ipage.status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } NPAIR_OMP_CLOSE; list->inum = nlocal; diff --git a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp index a7caac372acb40684b5306a026a6fb805b3ea39e..37a4181af73dd823a9a2eea1475d3c33eb06b15a 100644 --- a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp +++ b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp @@ -19,9 +19,6 @@ #include "atom.h" #include "atom_vec.h" #include "group.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -45,34 +42,20 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;; - - FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history; - NeighList * listhistory = list->listhistory; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal+atom->nghost; - } + const int history = list->history; + const int mask_history = 3 << SBBITS; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list,listhistory) +#pragma omp parallel default(none) shared(list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,m,n,nn,itag,jtag,dnum,dnumbytes; + int i,j,m,n,nn,itag,jtag; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -90,29 +73,10 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) MyPage<int> &ipage = list->ipage[tid]; ipage.reset(); - if (fix_history) { - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage+tid; - dpage_shear = listhistory->dpage+tid; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - ipage_touch->reset(); - dpage_shear->reset(); - } - for (i = ifrom; i < ito; i++) { n = 0; neighptr = ipage.vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } itag = tag[i]; xtmp = x[i][0]; @@ -150,29 +114,10 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -183,12 +128,6 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) if (ipage.status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } NPAIR_OMP_CLOSE; list->inum = nlocal; diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp index 87f9e2e3216596e59fa753db42b51520c677c1e5..f99637240998739282d37d11286fbe09badc9c65 100644 --- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp @@ -319,7 +319,7 @@ void PairBuckLongCoulLongOMP::compute_inner() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listinner->inum; + const int inum = list->inum_inner; #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -343,7 +343,7 @@ void PairBuckLongCoulLongOMP::compute_middle() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listmiddle->inum; + const int inum = list->inum_middle; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -373,7 +373,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listouter->inum; + const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) @@ -811,7 +811,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t const double *x0 = x[0]; double *f0 = f[0], *fi = 0; - int *ilist = listinner->ilist; + int *ilist = list->ilist_inner; const int newton_pair = force->newton_pair; @@ -835,7 +835,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; + jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; @@ -904,7 +904,7 @@ void PairBuckLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const const double *x0 = x[0]; double *f0 = f[0], *fi = 0; - int *ilist = listmiddle->ilist; + int *ilist = list->ilist_middle; const int newton_pair = force->newton_pair; @@ -932,7 +932,7 @@ void PairBuckLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; + jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; @@ -1009,7 +1009,7 @@ void PairBuckLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const t const double *x0 = x[0]; double *f0 = f[0], *fi = f0; - int *ilist = listouter->ilist; + int *ilist = list->ilist; int i, j, ii; int *jneigh, *jneighn, typei, typej, ni, respa_flag; @@ -1035,7 +1035,7 @@ void PairBuckLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const t buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei]; cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp index ac40e29edf0ae4f615d799fe344bb87533d9e7d4..046b71a73aee49f91222b0b787f1c1ea4551f434 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -14,6 +14,7 @@ #include <math.h> #include "pair_gran_hertz_history_omp.h" +#include "fix_neigh_history.h" #include "atom.h" #include "comm.h" #include "fix.h" @@ -134,8 +135,8 @@ void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; + firsttouch = fix_history->firstflag; + firstshear = fix_history->firstvalue; // loop over neighbors of my atoms diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index e507a63f7cd425f04aa605a7056a4001e7b93372..2e7d55aff059c1e7867cb35edd03bd3d19cf7eb6 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -14,6 +14,7 @@ #include <math.h> #include "pair_gran_hooke_history_omp.h" +#include "fix_neigh_history.h" #include "atom.h" #include "comm.h" #include "fix.h" @@ -137,8 +138,8 @@ void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; + firsttouch = fix_history->firstflag; + firstshear = fix_history->firstvalue; // loop over neighbors of my atoms diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp index 28d4f229c8c25b0ced8b4d4578fd0468b2c7442f..c0c87e74815896d4282a556035464d0559129d40 100644 --- a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp @@ -317,7 +317,7 @@ void PairLJLongCoulLongOMP::compute_inner() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listinner->inum; + const int inum = list->inum_inner; #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -341,7 +341,7 @@ void PairLJLongCoulLongOMP::compute_middle() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listmiddle->inum; + const int inum = list->inum_middle; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -371,7 +371,7 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listouter->inum; + const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) @@ -805,7 +805,7 @@ void PairLJLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const thr const double *x0 = x[0]; double *f0 = f[0], *fi = 0; - int *ilist = listinner->ilist; + int *ilist = list->ilist_inner; const int newton_pair = force->newton_pair; @@ -828,7 +828,7 @@ void PairLJLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const thr memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_ljsqi = cut_ljsq[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; - jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; + jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; ni = sbmask(j); @@ -896,7 +896,7 @@ void PairLJLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const th const double *x0 = x[0]; double *f0 = f[0], *fi = 0; - int *ilist = listmiddle->ilist; + int *ilist = list->ilist_middle; const int newton_pair = force->newton_pair; @@ -925,7 +925,7 @@ void PairLJLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const th memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_ljsqi = cut_ljsq[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; - jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; + jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i]; for (; jneigh<jneighn; ++jneigh) { j = *jneigh; @@ -1000,7 +1000,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr const double *x0 = x[0]; double *f0 = f[0], *fi = f0; - int *ilist = listouter->ilist; + int *ilist = list->ilist; int i, j, ii; int *jneigh, *jneighn, typei, typej, ni, respa_flag; @@ -1027,7 +1027,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp index 1c8f60d7dcb1c141d69e370fdff8bf0f5aef09d4..c6490b08243254def01c983c0c223dd2fbfb347d 100644 --- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp @@ -379,7 +379,7 @@ void PairLJLongTIP4PLongOMP::compute_inner() for (i = 0; i < nall; i++) hneigh_thr[i].t = 0; const int nthreads = comm->nthreads; - const int inum = listinner->inum; + const int inum = list->inum_inner; #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -403,7 +403,7 @@ void PairLJLongTIP4PLongOMP::compute_middle() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = listmiddle->inum; + const int inum = list->inum_middle; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -457,7 +457,7 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag) } const int nthreads = comm->nthreads; - const int inum = listouter->inum; + const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) @@ -1126,9 +1126,9 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th double *lj1i, *lj2i; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; // loop over neighbors of my atoms @@ -1388,9 +1388,9 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t int ni; double *lj1i, *lj2i; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; // loop over neighbors of my atoms @@ -1656,9 +1656,9 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th double fxtmp,fytmp,fztmp; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; // loop over neighbors of my atoms diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp index 738538a20935ffee2d8077a7713c6bab34b5f0c5..aa4aa65a4fcefec8bc143367d63347306c1c3bc6 100644 --- a/src/USER-OMP/respa_omp.cpp +++ b/src/USER-OMP/respa_omp.cpp @@ -108,6 +108,7 @@ void RespaOMP::setup() domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -200,6 +201,7 @@ void RespaOMP::setup_minimal(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -311,6 +313,10 @@ void RespaOMP::recurse(int ilevel) } neighbor->build(); timer->stamp(Timer::NEIGH); + if (modify->n_post_neighbor) { + modify->post_neighbor(); + timer->stamp(Timer::MODIFY); + } } else if (ilevel == 0) { timer->stamp(); comm->forward_comm(); diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 9d165f3fd3bbb7903cc1adaa2a592715d4d828f0..d1c4f907714ccce73e048d2b6611988a8a9f54dd 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -95,7 +95,7 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : pack_flag = 0; s = NULL; t = NULL; - nprev = 5; + nprev = 4; Hdia_inv = NULL; b_s = NULL; @@ -817,7 +817,7 @@ void FixQEqReax::calculate_Q() q[i] = s[i] - u * t[i]; /* backup s & t */ - for (k = 4; k > 0; --k) { + for (k = nprev-1; k > 0; --k) { s_hist[i][k] = s_hist[i][k-1]; t_hist[i][k] = t_hist[i][k-1]; } diff --git a/src/USER-UEF/README b/src/USER-UEF/README new file mode 100644 index 0000000000000000000000000000000000000000..92b2cee5e32877fbd41e03dbf675c4020690b384 --- /dev/null +++ b/src/USER-UEF/README @@ -0,0 +1,47 @@ +USER-UEF is a LAMMPS package for non-equilibrium molecular dynamics +(NEMD) under diagonal flow fields, including uniaxial and biaxial +flow. With this package, simulations under extensional flow may be +carried out for an indefinite amount of time. It is an implementation +of the boundary conditions developed by Matthew Dobson, and also uses +numerical lattice reduction as was proposed by Thomas Hunt. The +lattice reduction algorithm is from Igor Semaev. The package is +intended for simulations of homogeneous flows, and integrates the +SLLOD equations of motion. + +-- + +This package was created by David Nicholson (davidanich@gmail.com) +at the Massachusetts Institute of Technology. + +-- + +The following commands are contained in this package: + +fix npt/uef and fix nvt/uef: + These commands perform time-integration of the SLLOD equations of + motion under constant temperature/pressure with the proper + boundary conditions for extensional flow fields. + +compute pressure/uef and compute temp/uef: + These commands allow for the evaluation of pressure/ke tensors in + the reference frame corresponding to the applied flow field. + +dump cfg/uef: + This command dumps coordinates in the reference frame corresponding + to the applied flow field. + +For more information, visit the documentation page for fix nvt/uef +and examine the example scripts in doc/USER/uef/. + +-- + +References: + +I. Semaev, Cryptography and Lattices, 181 (2001). + +M. Dobson, J. Chem. Phys., 141, 184103 (2014) + +T.A. Hunt, Mol. Simul., 42, 347 (2016). + +D.A. Nicholson, G.C. Rutledge, J. Chem. Phys.,145, 244903 (2016) + diff --git a/src/USER-UEF/compute_pressure_uef.cpp b/src/USER-UEF/compute_pressure_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..43054d7d3f9652f1770361596fa8c62d954ff898 --- /dev/null +++ b/src/USER-UEF/compute_pressure_uef.cpp @@ -0,0 +1,195 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#include <mpi.h> +#include <string.h> +#include <stdlib.h> +#include "compute_pressure_uef.h" +#include "fix_nh_uef.h" +#include "update.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + * Default values for the ext flags + * ----------------------------------------------------------------------*/ +ComputePressureUef::ComputePressureUef(LAMMPS *lmp, int narg, char **arg) : + ComputePressure(lmp, narg, arg) +{ + ext_flags[0] = true; + ext_flags[1] = true; + ext_flags[2] = true; + in_fix=false; +} + +/* ---------------------------------------------------------------------- + * Check for the uef fix + * ----------------------------------------------------------------------*/ +void ComputePressureUef::init() +{ + ComputePressure::init(); + // check to make sure the other uef fix is on + // borrowed from Pieter's nvt/sllod code + int i=0; + for (i=0; i<modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"nvt/uef")==0) + break; + if (strcmp(modify->fix[i]->style,"npt/uef")==0) + break; + } + if (i==modify->nfix) + error->all(FLERR,"Can't use compute pressure/uef without defining a fix nvt/npt/uef"); + ifix_uef=i; + ((FixNHUef*) modify->fix[ifix_uef])->get_ext_flags(ext_flags); + + if (strcmp(temperature->style,"temp/uef") != 0) + error->warning(FLERR,"The temperature used in compute pressure/ued is not of style temp/uef"); +} + +/* ---------------------------------------------------------------------- + * Compute pressure in the directions i corresponding to ext_flag[i]=true + * ----------------------------------------------------------------------*/ +double ComputePressureUef::compute_scalar() +{ + + temperature->compute_scalar(); +// if all pressures are external the scalar is found as normal + if (ext_flags[0] && ext_flags[1] && ext_flags[2]) + return ComputePressure::compute_scalar(); + +// otherwise compute the full tensor and average desired components + compute_vector(); + addstep(update->ntimestep+1); + + int k =0; + scalar = 0; + if (ext_flags[0]) { + scalar += vector[0]; + k++; + } + if (ext_flags[1]) { + scalar += vector[1]; + k++; + } + if (ext_flags[2]) { + scalar += vector[2]; + k++; + } + + scalar /= k; + return scalar; +} + +/* ---------------------------------------------------------------------- + Compute the pressure tensor in the rotated coordinate system +------------------------------------------------------------------------- */ +void ComputePressureUef::compute_vector() +{ + invoked_vector = update->ntimestep; + if (update->vflag_global != invoked_vector) + error->all(FLERR,"Virial was not tallied on needed timestep"); + + if (force->kspace && kspace_virial && force->kspace->scalar_pressure_flag) + error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' for " + "tensor components with kspace_style msm"); + + // invoke temperature if it hasn't been already + + double *ke_tensor; + if (keflag) { + if (temperature->invoked_vector != update->ntimestep) + temperature->compute_vector(); + ke_tensor = temperature->vector; + } + + if (dimension == 3) { + inv_volume = 1.0 / (domain->xprd * domain->yprd * domain->zprd); + virial_compute(6,3); + if (in_fix) + virial_rot(virial,rot); + else + { + double r[3][3]; + ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(r); + virial_rot(virial,r); + } + if (keflag) { + for (int i = 0; i < 6; i++) + vector[i] = (ke_tensor[i] + virial[i]) * inv_volume * nktv2p; + } else + for (int i = 0; i < 6; i++) + vector[i] = virial[i] * inv_volume * nktv2p; + } else { + inv_volume = 1.0 / (domain->xprd * domain->yprd); + virial_compute(4,2); + if (keflag) { + vector[0] = (ke_tensor[0] + virial[0]) * inv_volume * nktv2p; + vector[1] = (ke_tensor[1] + virial[1]) * inv_volume * nktv2p; + vector[3] = (ke_tensor[3] + virial[3]) * inv_volume * nktv2p; + vector[2] = vector[4] = vector[5] = 0.0; + } else { + vector[0] = virial[0] * inv_volume * nktv2p; + vector[1] = virial[1] * inv_volume * nktv2p; + vector[3] = virial[3] * inv_volume * nktv2p; + vector[2] = vector[4] = vector[5] = 0.0; + } + } +} + +/* ---------------------------------------------------------------------- + * get the current rotation matrix and store it +------------------------------------------------------------------------- */ +void ComputePressureUef::update_rot() +{ + ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(rot); +} + +/* ---------------------------------------------------------------------- + Transform the pressure tensor to the rotated coordinate system + [P]rot = Q.[P].Q^t +------------------------------------------------------------------------- */ +void ComputePressureUef::virial_rot(double *x, const double r[3][3]) +{ + + double t[3][3]; + + // [00 10 20 ] [ 0 3 4 ] [00 01 02 ] + // [01 11 21 ] [ 3 1 5 ] [10 11 12 ] + // [02 12 22 ] [ 4 5 2 ] [20 21 22 ] + + for (int k = 0; k<3; ++k) + { + t[0][k] = x[0]*r[0][k] + x[3]*r[1][k] + x[4]*r[2][k]; + t[1][k] = x[3]*r[0][k] + x[1]*r[1][k] + x[5]*r[2][k]; + t[2][k] = x[4]*r[0][k] + x[5]*r[1][k] + x[2]*r[2][k]; + } + x[0] = r[0][0]*t[0][0] + r[1][0]*t[1][0] + r[2][0]*t[2][0]; + x[3] = r[0][0]*t[0][1] + r[1][0]*t[1][1] + r[2][0]*t[2][1]; + x[4] = r[0][0]*t[0][2] + r[1][0]*t[1][2] + r[2][0]*t[2][2]; + x[1] = r[0][1]*t[0][1] + r[1][1]*t[1][1] + r[2][1]*t[2][1]; + x[5] = r[0][1]*t[0][2] + r[1][1]*t[1][2] + r[2][1]*t[2][2]; + x[2] = r[0][2]*t[0][2] + r[1][2]*t[1][2] + r[2][2]*t[2][2]; +} diff --git a/src/USER-UEF/compute_pressure_uef.h b/src/USER-UEF/compute_pressure_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..d3a4d3195cdaa6845c880cf0d216488f482c25e5 --- /dev/null +++ b/src/USER-UEF/compute_pressure_uef.h @@ -0,0 +1,66 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS + +ComputeStyle(pressure/uef,ComputePressureUef) + +#else + +#ifndef LMP_COMPUTE_PRESSURE_UEF_H +#define LMP_COMPUTE_PRESSURE_UEF_H + +#include "compute_pressure.h" + +namespace LAMMPS_NS { + +class ComputePressureUef : public ComputePressure { + public: + ComputePressureUef(class LAMMPS *, int, char **); + virtual ~ComputePressureUef(){} + virtual void init(); + virtual void compute_vector(); + virtual double compute_scalar(); + void update_rot(); + bool in_fix; //true if this compute is used in fix/nvt/npt + + + protected: + bool ext_flags[3]; // true if used in average output pressure + void virial_rot(double*,const double[3][3]); + int ifix_uef; + double rot[3][3]; +}; + + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +This class inherits most of the warnings from ComputePressure. The +only additions are: + +E: Can't use compute pressure/uef without defining a fix nvt/npt/uef + +Self-explanatory. + +W: The temperature used in compute pressure/uef is not of style temp/uef + +Self-explanatory. + +*/ diff --git a/src/USER-UEF/compute_temp_uef.cpp b/src/USER-UEF/compute_temp_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..605552405611711327b9fd0a7409b7edbec70eca --- /dev/null +++ b/src/USER-UEF/compute_temp_uef.cpp @@ -0,0 +1,106 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#include <string.h> +#include <stdlib.h> +#include "compute_temp_uef.h" +#include "fix_nh_uef.h" +#include "update.h" +#include "modify.h" +#include "fix.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + * Base constructor initialized to use rotation matrix + * ----------------------------------------------------------------------*/ +ComputeTempUef::ComputeTempUef(LAMMPS *lmp, int narg, char **arg) : + ComputeTemp(lmp, narg, arg) +{ + rot_flag=true; +} + +/* ---------------------------------------------------------------------- + * Check for the uef fix + * ----------------------------------------------------------------------*/ +void ComputeTempUef::init() +{ + ComputeTemp::init(); + // check to make sure the other uef fix is on + // borrowed from Pieter's nvt/sllod code + int i=0; + for (i=0; i<modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"nvt/uef")==0) + break; + if (strcmp(modify->fix[i]->style,"npt/uef")==0) + break; + } + if (i==modify->nfix) + error->all(FLERR,"Can't use compute temp/uef without defining a fix nvt/npt/uef"); + ifix_uef=i; +} + + +/* ---------------------------------------------------------------------- + Compute the ke tensor in the proper coordinate system +------------------------------------------------------------------------- */ +void ComputeTempUef::compute_vector() +{ + ComputeTemp::compute_vector(); + if (rot_flag) { + double rot[3][3]; + ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(rot); + virial_rot(vector,rot); + } + +} + +/* ---------------------------------------------------------------------- + * turn the rotation matrix on or off to properly account for the + * coordinate system of the velocities +------------------------------------------------------------------------- */ +void ComputeTempUef::yes_rot() +{ + rot_flag =true; +} +void ComputeTempUef::no_rot() +{ + rot_flag =false; +} + +/* ---------------------------------------------------------------------- + Transform the pressure tensor to the rotated coordinate system + [P]rot = Q.[P].Q^t +------------------------------------------------------------------------- */ +void ComputeTempUef::virial_rot(double *x, const double r[3][3]) +{ + + double t[3][3]; + // [00 10 20 ] [ 0 3 4 ] [00 01 02 ] + // [01 11 21 ] [ 3 1 5 ] [10 11 12 ] + // [02 12 22 ] [ 4 5 2 ] [20 21 22 ] + for (int k = 0; k<3; ++k) { + t[0][k] = x[0]*r[0][k] + x[3]*r[1][k] + x[4]*r[2][k]; + t[1][k] = x[3]*r[0][k] + x[1]*r[1][k] + x[5]*r[2][k]; + t[2][k] = x[4]*r[0][k] + x[5]*r[1][k] + x[2]*r[2][k]; + } + x[0] = r[0][0]*t[0][0] + r[1][0]*t[1][0] + r[2][0]*t[2][0]; + x[3] = r[0][0]*t[0][1] + r[1][0]*t[1][1] + r[2][0]*t[2][1]; + x[4] = r[0][0]*t[0][2] + r[1][0]*t[1][2] + r[2][0]*t[2][2]; + x[1] = r[0][1]*t[0][1] + r[1][1]*t[1][1] + r[2][1]*t[2][1]; + x[5] = r[0][1]*t[0][2] + r[1][1]*t[1][2] + r[2][1]*t[2][2]; + x[2] = r[0][2]*t[0][2] + r[1][2]*t[1][2] + r[2][2]*t[2][2]; +} diff --git a/src/USER-UEF/compute_temp_uef.h b/src/USER-UEF/compute_temp_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..460e2b18c0fc9f1348b654692d7c24515cda5fa8 --- /dev/null +++ b/src/USER-UEF/compute_temp_uef.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS + +ComputeStyle(temp/uef,ComputeTempUef) + +#else + +#ifndef LMP_COMPUTE_TEMP_UEF_H +#define LMP_COMPUTE_TEMP_UEF_H + +#include "compute_temp.h" + +namespace LAMMPS_NS { + +class ComputeTempUef : public ComputeTemp { + public: + ComputeTempUef(class LAMMPS *, int, char **); + virtual ~ComputeTempUef(){} + virtual void init(); + virtual void compute_vector(); + void yes_rot(); + void no_rot(); + + + protected: + bool rot_flag; + void virial_rot(double*,const double[3][3]); + int ifix_uef; +}; + + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +This class inherits most of the warnings from ComputePressure. The +only addition is: + +E: Can't use compute temp/uef without defining a fix nvt/npt/uef + +Self-explanatory. + +*/ diff --git a/src/USER-UEF/dump_cfg_uef.cpp b/src/USER-UEF/dump_cfg_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b0c08275a62a7bb78132fd2ab6a138a0670d11d --- /dev/null +++ b/src/USER-UEF/dump_cfg_uef.cpp @@ -0,0 +1,114 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing Author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include "dump_cfg.h" +#include "atom.h" +#include "domain.h" +#include "modify.h" +#include "compute.h" +#include "fix.h" +#include "error.h" +#include "uef_utils.h" +#include "dump_cfg_uef.h" +#include "fix_nh_uef.h" + +using namespace LAMMPS_NS; + +enum{INT,DOUBLE,STRING,BIGINT}; // same as in DumpCustom + +#define UNWRAPEXPAND 10.0 +#define ONEFIELD 32 +#define DELTA 1048576 + +/* ---------------------------------------------------------------------- + * base method is mostly fine, just need to find the FixNHUef + * ----------------------------------------------------------------------*/ +void DumpCFGUef::init_style() +{ + DumpCFG::init_style(); + + // check to make sure the other uef fix is on + // borrowed from Pieter's nvt/sllod code + int i=0; + for (i=0; i<modify->nfix; i++) + { + if (strcmp(modify->fix[i]->style,"nvt/uef")==0) + break; + if (strcmp(modify->fix[i]->style,"npt/uef")==0) + break; + } + if (i==modify->nfix) + error->all(FLERR,"Can't use dump cfg/uef without defining a fix nvt/npt/uef"); + ifix_uef=i; +} + +/* ---------------------------------------------------------------------- + * this is really the only difference between the base class and this one. + * since the output is in scaled coordinates, changing the simulation box + * edges to the flow frame will put coordinates in the flow frame too. + * ----------------------------------------------------------------------*/ + +void DumpCFGUef::write_header(bigint n) +{ + // set scale factor used by AtomEye for CFG viz + // default = 1.0 + // for peridynamics, set to pre-computed PD scale factor + // so PD particles mimic C atoms + // for unwrapped coords, set to UNWRAPEXPAND (10.0) + // so molecules are not split across periodic box boundaries + + double box[3][3],rot[3][3]; + ((FixNHUef*) modify->fix[ifix_uef])->get_box(box); + ((FixNHUef*) modify->fix[ifix_uef])->get_rot(rot); + // rot goes from "lab frame" to "upper triangular frame" + // it's transpose takes the simulation box to the flow frame + for (int i=0;i<3;i++) + for(int j=i+1;j<3;j++) + { + double t=rot[i][j]; + rot[i][j]=rot[j][i]; + rot[j][i]=t; + } + UEF_utils::mul_m2(rot,box); + + + double scale = 1.0; + if (atom->peri_flag) scale = atom->pdscale; + else if (unwrapflag == 1) scale = UNWRAPEXPAND; + + char str[64]; + sprintf(str,"Number of particles = %s\n",BIGINT_FORMAT); + fprintf(fp,str,n); + fprintf(fp,"A = %g Angstrom (basic length-scale)\n",scale); + // in box[][] columns are cell edges + // in H0, rows are cell edges + fprintf(fp,"H0(1,1) = %g A\n",box[0][0]); + fprintf(fp,"H0(1,2) = %g A\n",box[1][0]); + fprintf(fp,"H0(1,3) = %g A\n",box[2][0]); + fprintf(fp,"H0(2,1) = %g A\n",box[0][1]); + fprintf(fp,"H0(2,2) = %g A\n",box[1][1]); + fprintf(fp,"H0(2,3) = %g A\n",box[2][1]); + fprintf(fp,"H0(3,1) = %g A\n",box[0][2]); + fprintf(fp,"H0(3,2) = %g A\n",box[1][2]); + fprintf(fp,"H0(3,3) = %g A\n",box[2][2]); + fprintf(fp,".NO_VELOCITY.\n"); + fprintf(fp,"entry_count = %d\n",nfield-2); + for (int i = 0; i < nfield-5; i++) + fprintf(fp,"auxiliary[%d] = %s\n",i,auxname[i]); +} diff --git a/src/USER-UEF/dump_cfg_uef.h b/src/USER-UEF/dump_cfg_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..d2881136ad38a4fc363cd9228315cd69fbc987d3 --- /dev/null +++ b/src/USER-UEF/dump_cfg_uef.h @@ -0,0 +1,51 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing Author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifdef DUMP_CLASS + +DumpStyle(cfg/uef,DumpCFGUef) + +#else + +#ifndef LMP_DUMP_CFG_UEF_H +#define LMP_DUMP_CFG_UEF_H + +#include "dump_cfg.h" + +namespace LAMMPS_NS { + +class DumpCFGUef : public DumpCFG { + public: + DumpCFGUef(LAMMPS *lmp, int narg, char **arg) : + DumpCFG(lmp, narg, arg){} + void init_style(); + void write_header(bigint); + + protected: + int ifix_uef; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Can't use dump cfg/uef without defining a fix nvt/npt/uef + +Self-explanatory. + +*/ diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd639c34dbc3e4442a16c60875d01587f085ff1d --- /dev/null +++ b/src/USER-UEF/fix_nh_uef.cpp @@ -0,0 +1,820 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#include <string.h> +#include <stdlib.h> +#include <math.h> +#include "fix_nh_uef.h" +#include "atom.h" +#include "force.h" +#include "group.h" +#include "comm.h" +#include "citeme.h" +#include "irregular.h" +#include "modify.h" +#include "compute.h" +#include "kspace.h" +#include "update.h" +#include "domain.h" +#include "error.h" +#include "output.h" +#include "timer.h" +#include "neighbor.h" +#include "compute_pressure_uef.h" +#include "compute_temp_uef.h" +#include "uef_utils.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{ISO,ANISO,TRICLINIC}; + +// citation info + +static const char cite_user_uef_package[] = + "USER-UEF package:\n\n" + "@Article{NicholsonRutledge16,\n" + "author = {David A. Nicholson and Gregory C. Rutledge},\n" + "title = {Molecular simulation of flow-enhanced nucleation in n-eicosane melts under steady shear and uniaxial extension},\n" + "journal = {The Journal of Chemical Physics},\n" + "volume = {145},\n" + "number = {24},\n" + "pages = {244903},\n" + "year = {2016}\n" + "}\n\n"; + +/* ---------------------------------------------------------------------- + * Parse fix specific keywords, do some error checking, and initalize + * temp/pressure fixes + ---------------------------------------------------------------------- */ +FixNHUef::FixNHUef(LAMMPS *lmp, int narg, char **arg) : + FixNH(lmp, narg, arg), uefbox(NULL) +{ + if (lmp->citeme) lmp->citeme->add(cite_user_uef_package); + + //initialization + + erate[0] = erate[1] = 0; + + // default values + + strain[0]=strain[1]= 0; + ext_flags[0]=ext_flags[1]=ext_flags[2] = true; + + // need to initialize these + + omega_dot[0]=omega_dot[1]=omega_dot[2]=0; + + // parse fix nh/uef specific options + + bool erate_flag = false; + int iarg = 3; + + while (iarg <narg) { + if (strcmp(arg[iarg],"erate")==0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command"); + erate[0] = force->numeric(FLERR,arg[iarg+1]); + erate[1] = force->numeric(FLERR,arg[iarg+2]); + erate_flag = true; + iarg += 3; + } else if (strcmp(arg[iarg],"strain")==0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command"); + strain[0] = force->numeric(FLERR,arg[iarg+1]); + strain[1] = force->numeric(FLERR,arg[iarg+2]); + iarg += 3; + } else if (strcmp(arg[iarg],"ext")==0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command"); + if (strcmp(arg[iarg+1],"x")==0) + ext_flags[1] = ext_flags[2] = false; + else if (strcmp(arg[iarg+1],"y")==0) + ext_flags[0] = ext_flags[2] = false; + else if (strcmp(arg[iarg+1],"z")==0) + ext_flags[0] = ext_flags[1] = false; + else if (strcmp(arg[iarg+1],"xy")==0) + ext_flags[2] = false; + else if (strcmp(arg[iarg+1],"xz")==0) + ext_flags[1] = false; + else if (strcmp(arg[iarg+1],"yz")==0) + ext_flags[0] = false; + else if (strcmp(arg[iarg+1],"xyz")!=0) + error->all(FLERR,"Illegal fix nvt/npt/uef command"); + + iarg += 2; + } else { + + // skip to next argument; argument check for unknown keywords is done in FixNH + + ++iarg; + } + } + + if (!erate_flag) + error->all(FLERR,"Keyword erate must be set for fix npt/npt/uef command"); + + if (mtchain_default_flag) mtchain=1; + + if (!domain->triclinic) + error->all(FLERR,"Simulation box must be triclinic for fix/nvt/npt/uef"); + + // check for conditions that impose a deviatoric stress + + if (pstyle == TRICLINIC) + error->all(FLERR,"Only normal stresses can be controlled with fix/nvt/npt/uef"); + double erate_tmp[3]; + erate_tmp[0]=erate[0]; + erate_tmp[1]=erate[1]; + erate_tmp[2]=-erate[0]-erate[1]; + + if (pstyle == ANISO) { + if (!(ext_flags[0] & ext_flags[1] & ext_flags[2])) + error->all(FLERR,"The ext keyword may only be used with iso pressure control"); + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + if (p_flag[k] && p_flag[j]) { + double tol = 1e-6; + if ( !nearly_equal(p_start[k],p_start[j],tol) + || !nearly_equal(p_stop[k],p_stop[j],tol)) + error->all(FLERR,"All controlled stresses must have the same " + "value in fix/nvt/npt/uef"); + if ( !nearly_equal(erate_tmp[k],erate_tmp[j],tol) + || !nearly_equal(erate_tmp[k],erate_tmp[j],tol)) + error->all(FLERR,"Dimensions with controlled stresses must have"\ + " same strain rate in fix/nvt/npt/uef"); + } + } + + // conditions that produce a deviatoric stress have already been eliminated. + + deviatoric_flag=0; + + // need pre_exchange and irregular migration + + pre_exchange_flag = 1; + irregular = new Irregular(lmp); + + // flag that I change the box here (in case of nvt) + + box_change_shape = 1; + + // initialize the UEFBox class which computes the box at each step + + uefbox = new UEF_utils::UEFBox(); + uefbox->set_strain(strain[0],strain[1]); + + // reset fixedpoint to the stagnation point. I don't allow fixedpoint + // to be set by the user. + + fixedpoint[0] = domain->boxlo[0]; + fixedpoint[1] = domain->boxlo[1]; + fixedpoint[2] = domain->boxlo[2]; + + // Create temp and pressure computes for nh/uef + + int n = strlen(id) + 6; + id_temp = new char[n]; + strcpy(id_temp,id); + strcat(id_temp,"_temp"); + char **newarg = new char*[3]; + newarg[0] = id_temp; + newarg[1] = (char *) "all"; + newarg[2] = (char *) "temp/uef"; + modify->add_compute(3,newarg); + delete [] newarg; + tcomputeflag = 1; + + n = strlen(id) + 7; + id_press = new char[n]; + strcpy(id_press,id); + strcat(id_press,"_press"); + newarg = new char*[4]; + newarg[0] = id_press; + newarg[1] = (char *) "all"; + newarg[2] = (char *) "pressure/uef"; + newarg[3] = id_temp; + modify->add_compute(4,newarg); + delete [] newarg; + pcomputeflag = 1; + + nevery = 1; +} + +/* ---------------------------------------------------------------------- + * Erase the UEFBox object and get rid of the pressure compute if the nvt + * version is being used. Everything else will be done in base destructor + * ---------------------------------------------------------------------- */ +FixNHUef::~FixNHUef() +{ + delete uefbox; + if (pcomputeflag && !pstat_flag) + { + modify->delete_compute(id_press); + delete [] id_press; + } +} + +/* ---------------------------------------------------------------------- + * Make the end_of_step() routine callable + * ---------------------------------------------------------------------- */ +int FixNHUef::setmask() +{ + int mask = FixNH::setmask(); + mask |= END_OF_STEP; + return mask; +} + +/* ---------------------------------------------------------------------- + * Run FixNH::init() and do more error checking. Set the pressure + * pointer in the case that the nvt version is used + * ---------------------------------------------------------------------- */ +void FixNHUef::init() +{ + FixNH::init(); + + + // find conflict with fix/deform or other box chaging fixes + for (int i=0; i < modify->nfix; i++) + { + if (strcmp(modify->fix[i]->id,id) != 0) + if (modify->fix[i]->box_change_shape != 0) + error->all(FLERR,"Can't use another fix which changes box shape with fix/nvt/npt/uef"); + } + + + // this will make the pressure compute for nvt + if (!pstat_flag) + if (pcomputeflag) { + int icomp = modify->find_compute(id_press); + if (icomp<0) + error->all(FLERR,"Pressure ID for fix/nvt/uef doesn't exist"); + pressure = modify->compute[icomp]; + + if (strcmp(pressure->style,"pressure/uef") != 0) + error->all(FLERR,"Using fix nvt/npt/uef without a compute pressure/uef"); + } + + if (strcmp(temperature->style,"temp/uef") != 0) + error->all(FLERR,"Using fix nvt/npt/uef without a compute temp/uef"); +} + +/* ---------------------------------------------------------------------- + * Run FixNH::setup() make sure the box is OK and set the rotation matrix + * for the first step + * ---------------------------------------------------------------------- */ +void FixNHUef::setup(int j) +{ + double box[3][3]; + double vol = domain->xprd * domain->yprd * domain->zprd; + uefbox->get_box(box,vol); + double tol = 1e-4; + // ensure the box is ok for uef + bool isok = true; + isok &= nearly_equal(domain->h[0],box[0][0],tol); + isok &= nearly_equal(domain->h[1],box[1][1],tol); + isok &= nearly_equal(domain->h[2],box[2][2],tol); + isok &= nearly_equal(domain->xy,box[0][1],tol); + isok &= nearly_equal(domain->yz,box[1][2],tol); + isok &= nearly_equal(domain->xz,box[0][2],tol); + if (!isok) + error->all(FLERR,"Initial box is not close enough to the expected uef box"); + + uefbox->get_rot(rot); + ((ComputeTempUef*) temperature)->yes_rot(); + ((ComputePressureUef*) pressure)->in_fix = true; + ((ComputePressureUef*) pressure)->update_rot(); + FixNH::setup(j); +} + +/* ---------------------------------------------------------------------- + * rotate -> initial integration step -> rotate back + * ---------------------------------------------------------------------- */ +void FixNHUef::initial_integrate(int vflag) +{ + inv_rotate_x(rot); + inv_rotate_v(rot); + inv_rotate_f(rot); + ((ComputeTempUef*) temperature)->no_rot(); + FixNH::initial_integrate(vflag); + rotate_x(rot); + rotate_v(rot); + rotate_f(rot); + ((ComputeTempUef*) temperature)->yes_rot(); +} + +/* ---------------------------------------------------------------------- + * rotate -> initial integration step -> rotate back (RESPA) + * ---------------------------------------------------------------------- */ +void FixNHUef::initial_integrate_respa(int vflag, int ilevel, int iloop) +{ + inv_rotate_x(rot); + inv_rotate_v(rot); + inv_rotate_f(rot); + ((ComputeTempUef*) temperature)->no_rot(); + FixNH::initial_integrate_respa(vflag,ilevel,iloop); + rotate_x(rot); + rotate_v(rot); + rotate_f(rot); + ((ComputeTempUef*) temperature)->yes_rot(); +} + +/* ---------------------------------------------------------------------- + * rotate -> final integration step -> rotate back + * ---------------------------------------------------------------------- */ +void FixNHUef::final_integrate() +{ + // update rot here since it must directly follow the virial calculation + ((ComputePressureUef*) pressure)->update_rot(); + inv_rotate_v(rot); + inv_rotate_f(rot); + ((ComputeTempUef*) temperature)->no_rot(); + FixNH::final_integrate(); + rotate_v(rot); + rotate_f(rot); + ((ComputeTempUef*) temperature)->yes_rot(); +} + +/* ---------------------------------------------------------------------- + * at outer level: call this->final_integrate() + * at other levels: rotate -> 2nd verlet step -> rotate back + * ---------------------------------------------------------------------- */ +void FixNHUef::final_integrate_respa(int ilevel, int iloop) +{ + // set timesteps by level + dtf = 0.5 * step_respa[ilevel] * force->ftm2v; + dthalf = 0.5 * step_respa[ilevel]; + // outermost level - update eta_dot and omega_dot, apply via final_integrate + // all other levels - NVE update of v + if (ilevel == nlevels_respa-1) final_integrate(); + else + { + inv_rotate_v(rot); + inv_rotate_f(rot); + nve_v(); + rotate_v(rot); + rotate_f(rot); + } +} + +/* ---------------------------------------------------------------------- + SLLOD velocity update in time-reversible (i think) increments + v -> exp(-edot*dt/2)*v + v -> v +f/m*dt + v -> exp(-edot*dt/2)*v +-----------------------------------------------------------------------*/ +void FixNHUef::nve_v() +{ + double dtfm; + double **v = atom->v; + double **f = atom->f; + double *rmass = atom->rmass; + double *mass = atom->mass; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double ex = erate[0]*dtf/2; + double ey = erate[1]*dtf/2; + double ez = -ex-ey; + double e0 = exp(-ex); + double e1 = exp(-ey); + double e2 = exp(-ez); + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + if (rmass) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + dtfm = dtf / rmass[i]; + v[i][0] *= e0; + v[i][1] *= e1; + v[i][2] *= e2; + v[i][0] += dtfm*f[i][0]; + v[i][1] += dtfm*f[i][1]; + v[i][2] += dtfm*f[i][2]; + v[i][0] *= e0; + v[i][1] *= e1; + v[i][2] *= e2; + } + } + } else { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + dtfm = dtf / mass[type[i]]; + v[i][0] *= e0; + v[i][1] *= e1; + v[i][2] *= e2; + v[i][0] += dtfm*f[i][0]; + v[i][1] += dtfm*f[i][1]; + v[i][2] += dtfm*f[i][2]; + v[i][0] *= e0; + v[i][1] *= e1; + v[i][2] *= e2; + } + } + } +} + +/* ---------------------------------------------------------------------- + Don't actually move atoms in remap(), just change the box +-----------------------------------------------------------------------*/ +void FixNHUef::remap() +{ + double vol = domain->xprd * domain->yprd * domain->zprd; + double domega = dto*(omega_dot[0]+omega_dot[1]+omega_dot[2])/3.; + + // constant volume strain associated with barostat + // box scaling + double ex = dto*omega_dot[0]-domega; + double ey = dto*omega_dot[1]-domega; + uefbox->step_deform(ex,ey); + strain[0] += ex; + strain[1] += ey; + + // volume change + vol = vol*exp(3*domega); + double box[3][3]; + uefbox->get_box(box,vol); + domain->boxhi[0] = domain->boxlo[0]+box[0][0]; + domain->boxhi[1] = domain->boxlo[1]+box[1][1]; + domain->boxhi[2] = domain->boxlo[2]+box[2][2]; + domain->xy = box[0][1]; + domain->xz = box[0][2]; + domain->yz = box[1][2]; + domain->set_global_box(); + domain->set_local_box(); + uefbox->get_rot(rot); +} + +/* ---------------------------------------------------------------------- + SLLOD position update in time-reversible (i think) increments + x -> exp(edot*dt/2)*x + x -> x + v*dt + x -> exp(edot*dt/2)*x +-----------------------------------------------------------------------*/ +void FixNHUef::nve_x() +{ + double **x = atom->x; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double ex = erate[0]*dtv; + strain[0] += ex; + double e0 = exp((ex+omega_dot[0]*dtv)/2); + double ey = erate[1]*dtv; + strain[1] += ey; + double e1 = exp((ey+omega_dot[1]*dtv)/2.); + double ez = -ex -ey; + double e2 = exp((ez+omega_dot[2]*dtv)/2.); + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + // x update by full step only for atoms in group + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + x[i][0] *= e0; + x[i][1] *= e1; + x[i][2] *= e2; + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + x[i][0] *= e0; + x[i][1] *= e1; + x[i][2] *= e2; + } + } + uefbox->step_deform(ex,ey); + double box[3][3]; + double vol = domain->xprd * domain->yprd * domain->zprd; + uefbox->get_box(box,vol); + domain->boxhi[0] = domain->boxlo[0]+box[0][0]; + domain->boxhi[1] = domain->boxlo[1]+box[1][1]; + domain->boxhi[2] = domain->boxlo[2]+box[2][2]; + domain->xy = box[0][1]; + domain->xz = box[0][2]; + domain->yz = box[1][2]; + domain->set_global_box(); + domain->set_local_box(); + uefbox->get_rot(rot); +} + +/* ---------------------------------------------------------------------- + * Do the lattice reduction if necessary. +-----------------------------------------------------------------------*/ +void FixNHUef::pre_exchange() +{ + // only need to reset things if the lattice needs to be reduced + if (uefbox->reduce()) + { + // go to lab frame + inv_rotate_x(rot); + inv_rotate_v(rot); + inv_rotate_f(rot); + // get & set the new box and rotation matrix + double vol = domain->xprd * domain->yprd * domain->zprd; + double box[3][3]; + uefbox->get_box(box,vol); + domain->boxhi[0] = domain->boxlo[0]+box[0][0]; + domain->boxhi[1] = domain->boxlo[1]+box[1][1]; + domain->boxhi[2] = domain->boxlo[2]+box[2][2]; + domain->xy = box[0][1]; + domain->xz = box[0][2]; + domain->yz = box[1][2]; + domain->set_global_box(); + domain->set_local_box(); + uefbox->get_rot(rot); + + // rotate to the new upper triangular frame + rotate_v(rot); + rotate_x(rot); + rotate_f(rot); + + // put all atoms in the new box + double **x = atom->x; + imageint *image = atom->image; + int nlocal = atom->nlocal; + for (int i=0; i<nlocal; i++) domain->remap(x[i],image[i]); + + // move atoms to the right processors + domain->x2lamda(atom->nlocal); + irregular->migrate_atoms(); + domain->lamda2x(atom->nlocal); + } +} + +/* ---------------------------------------------------------------------- + * The following are routines to rotate between the lab and upper triangular + * (UT) frames. For most of the time the simulation is in the UT frame. + * To get to the lab frame, apply the inv_rotate_[..](rot) and to + * get back to the UT frame apply rotate_[..](rot). + * + * Note: the rotate_x() functions also apply a shift to/from the fixedpoint + * to make the integration a little simpler. + * ---------------------------------------------------------------------- */ +void FixNHUef::rotate_x(double r[3][3]) +{ + double **x = atom->x; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double xn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + xn[0]=r[0][0]*x[i][0]+r[0][1]*x[i][1]+r[0][2]*x[i][2]; + xn[1]=r[1][0]*x[i][0]+r[1][1]*x[i][1]+r[1][2]*x[i][2]; + xn[2]=r[2][0]*x[i][0]+r[2][1]*x[i][1]+r[2][2]*x[i][2]; + x[i][0]=xn[0]+domain->boxlo[0]; + x[i][1]=xn[1]+domain->boxlo[1]; + x[i][2]=xn[2]+domain->boxlo[2]; + } + } +} + +void FixNHUef::inv_rotate_x(double r[3][3]) +{ + double **x = atom->x; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double xn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + x[i][0] -= domain->boxlo[0]; + x[i][1] -= domain->boxlo[1]; + x[i][2] -= domain->boxlo[2]; + xn[0]=r[0][0]*x[i][0]+r[1][0]*x[i][1]+r[2][0]*x[i][2]; + xn[1]=r[0][1]*x[i][0]+r[1][1]*x[i][1]+r[2][1]*x[i][2]; + xn[2]=r[0][2]*x[i][0]+r[1][2]*x[i][1]+r[2][2]*x[i][2]; + x[i][0]=xn[0]; + x[i][1]=xn[1]; + x[i][2]=xn[2]; + } + } +} + +void FixNHUef::rotate_v(double r[3][3]) +{ + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double vn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + vn[0]=r[0][0]*v[i][0]+r[0][1]*v[i][1]+r[0][2]*v[i][2]; + vn[1]=r[1][0]*v[i][0]+r[1][1]*v[i][1]+r[1][2]*v[i][2]; + vn[2]=r[2][0]*v[i][0]+r[2][1]*v[i][1]+r[2][2]*v[i][2]; + v[i][0]=vn[0]; v[i][1]=vn[1]; v[i][2]=vn[2]; + } + } +} + +void FixNHUef::inv_rotate_v(double r[3][3]) +{ + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double vn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + vn[0]=r[0][0]*v[i][0]+r[1][0]*v[i][1]+r[2][0]*v[i][2]; + vn[1]=r[0][1]*v[i][0]+r[1][1]*v[i][1]+r[2][1]*v[i][2]; + vn[2]=r[0][2]*v[i][0]+r[1][2]*v[i][1]+r[2][2]*v[i][2]; + v[i][0]=vn[0]; v[i][1]=vn[1]; v[i][2]=vn[2]; + } + } +} + +void FixNHUef::rotate_f(double r[3][3]) +{ + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double fn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + fn[0]=r[0][0]*f[i][0]+r[0][1]*f[i][1]+r[0][2]*f[i][2]; + fn[1]=r[1][0]*f[i][0]+r[1][1]*f[i][1]+r[1][2]*f[i][2]; + fn[2]=r[2][0]*f[i][0]+r[2][1]*f[i][1]+r[2][2]*f[i][2]; + f[i][0]=fn[0]; f[i][1]=fn[1]; f[i][2]=fn[2]; + } + } +} + +void FixNHUef::inv_rotate_f(double r[3][3]) +{ + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + double fn[3]; + for (int i=0;i<nlocal;i++) + { + if (mask[i] & groupbit) + { + fn[0]=r[0][0]*f[i][0]+r[1][0]*f[i][1]+r[2][0]*f[i][2]; + fn[1]=r[0][1]*f[i][0]+r[1][1]*f[i][1]+r[2][1]*f[i][2]; + fn[2]=r[0][2]*f[i][0]+r[1][2]*f[i][1]+r[2][2]*f[i][2]; + f[i][0]=fn[0]; f[i][1]=fn[1]; f[i][2]=fn[2]; + } + } +} + +/* ---------------------------------------------------------------------- + * Increase the size of the restart list to add in the strains + * ---------------------------------------------------------------------- */ +int FixNHUef::size_restart_global() +{ + return FixNH::size_restart_global() +2; +} + +/* ---------------------------------------------------------------------- + * Pack the strains after packing the default FixNH values + * ---------------------------------------------------------------------- */ +int FixNHUef::pack_restart_data(double *list) +{ + int n = FixNH::pack_restart_data(list); + list[n++] = strain[0]; + list[n++] = strain[1]; + return n; +} + +/* ---------------------------------------------------------------------- + * read and set the strains after the default FixNH values + * ---------------------------------------------------------------------- */ +void FixNHUef::restart(char *buf) +{ + int n = size_restart_global(); + FixNH::restart(buf); + double *list = (double *) buf; + strain[0] = list[n-2]; + strain[1] = list[n-1]; + uefbox->set_strain(strain[0],strain[1]); +} + +/* ---------------------------------------------------------------------- + * If the step writes a restart, reduce the box beforehand. This makes sure + * the unique box shape can be found once the restart is read and that + * all of the atoms lie within the box. + * This may only be necessary for RESPA runs, but I'm leaving it in anyway. + * ---------------------------------------------------------------------- */ +void FixNHUef::end_of_step() +{ + if (update->ntimestep==output->next_restart) + { + pre_exchange(); + domain->x2lamda(atom->nlocal); + domain->pbc(); + timer->stamp(); + comm->exchange(); + comm->borders(); + domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(Timer::COMM); + neighbor->build(); + timer->stamp(Timer::NEIGH); + } +} + +/* ---------------------------------------------------------------------- + * reduce the simulation box after a run is complete. otherwise it won't + * be possible to resume from a write_restart since the initialization of + * the simulation box requires reduced simulation box + * ---------------------------------------------------------------------- */ +void FixNHUef::post_run() +{ + pre_exchange(); + domain->x2lamda(atom->nlocal); + domain->pbc(); + timer->stamp(); + comm->exchange(); + comm->borders(); + domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(Timer::COMM); + neighbor->build(); + timer->stamp(Timer::NEIGH); +} + +/* ---------------------------------------------------------------------- + * public read for rotation matrix + * ---------------------------------------------------------------------- */ +void FixNHUef::get_rot(double r[3][3]) +{ + r[0][0] = rot[0][0]; + r[0][1] = rot[0][1]; + r[0][2] = rot[0][2]; + r[1][0] = rot[1][0]; + r[1][1] = rot[1][1]; + r[1][2] = rot[1][2]; + r[2][0] = rot[2][0]; + r[2][1] = rot[2][1]; + r[2][2] = rot[2][2]; +} + +/* ---------------------------------------------------------------------- + * public read for ext flags + * ---------------------------------------------------------------------- */ +void FixNHUef::get_ext_flags(bool* e) +{ + e[0] = ext_flags[0]; + e[1] = ext_flags[1]; + e[2] = ext_flags[2]; +} + +/* ---------------------------------------------------------------------- + * public read for simulation box + * ---------------------------------------------------------------------- */ +void FixNHUef::get_box(double b[3][3]) +{ + double box[3][3]; + double vol = domain->xprd * domain->yprd * domain->zprd; + uefbox->get_box(box,vol); + b[0][0] = box[0][0]; + b[0][1] = box[0][1]; + b[0][2] = box[0][2]; + b[1][0] = box[1][0]; + b[1][1] = box[1][1]; + b[1][2] = box[1][2]; + b[2][0] = box[2][0]; + b[2][1] = box[2][1]; + b[2][2] = box[2][2]; +} + +/* ---------------------------------------------------------------------- + * comparing floats + * it's imperfect, but should work provided no infinities + * ---------------------------------------------------------------------- */ +bool FixNHUef::nearly_equal(double a, double b, double epsilon) +{ + double absa = fabs(a); + double absb = fabs(b); + double diff = fabs(a-b); + if (a == b) return true; + else if ( (absa+absb) < epsilon) + return diff < epsilon*epsilon; + else + return diff/(absa+absb) < epsilon; +} diff --git a/src/USER-UEF/fix_nh_uef.h b/src/USER-UEF/fix_nh_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..43f5bb46a9f2c97c6f3a5f784506d3c9839aed56 --- /dev/null +++ b/src/USER-UEF/fix_nh_uef.h @@ -0,0 +1,128 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing Author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + + +#ifndef LMP_FIX_NH_UEF_H +#define LMP_FIX_NH_UEF_H + +#include "fix_nh.h" + +namespace LAMMPS_NS { + // forward declaration + namespace UEF_utils { + class UEFBox; + }; + +class FixNHUef : public FixNH { + public: + FixNHUef(class LAMMPS *, int, char **); + virtual ~FixNHUef(); + virtual int setmask(); + virtual void init(); + virtual void setup(int); + virtual void pre_exchange(); + virtual int pack_restart_data(double*); + virtual void restart(char *); + virtual void end_of_step(); + virtual void initial_integrate(int); + virtual void final_integrate(); + virtual void initial_integrate_respa(int, int, int); + virtual void final_integrate_respa(int, int); + virtual void post_run(); + void get_rot(double[3][3]); + void get_ext_flags(bool*); + void get_box(double[3][3]); + + protected: + virtual void remap(); + virtual int size_restart_global(); + virtual void nve_x(); + virtual void nve_v(); + void rotate_x(double [3][3]); + void inv_rotate_x(double[3][3]); + void rotate_v(double[3][3]); + void inv_rotate_v(double[3][3]); + void rotate_f(double[3][3]); + void inv_rotate_f(double[3][3]); + double strain[2],erate[2]; // strain/strain rate : [e_x, e_y] + // always assume traceless e_z = -e_x-e_y + + int rem; //this is for the narg kluge + + UEF_utils::UEFBox *uefbox; // interface for the special simulation box + + double rot[3][3]; // rotation matrix + bool ext_flags[3]; // flags for external "free surfaces" + bool nearly_equal(double,double,double); + //bool rotate_output; // experimental feature. Too many issues for now +}; + +} + +#endif + +/* ERROR/WARNING messages: + +This is a base class for FixNH so it will inherit most of its error/warning messages along with the following: + +E: Illegal fix nvt/npt/uef command + +Self-explanatory + +E: Keyword erate must be set for fix nvt/npt/uef command + +Self-explanatory. + +E: Simulation box must be triclinic for fix/nvt/npt/uef + +Self-explanatory. + +E: Only normal stresses can be controlled with fix/nvt/npt/uef + +The keywords xy xz and yz cannot be used for pressure control + +E: The ext keyword may only be used with iso pressure control + +Self-explanatory + +E: All controlled stresses must have the same value in fix/nvt/npt/uef + +Stress control is only possible when the stress specified for each dimension is the same + +E: Dimensions with controlled stresses must have same strain rate in fix/nvt/npt/uef + +Stress-controlled dimensions with the same strain rate must have the same target stress. + +E: Can't use another fix which changes box shape with fix/nvt/npt/uef + +The fix npt/nvt/uef command must have full control over the box shape. You cannot use a simultaneous fix deform command, for example. + +E: Pressure ID for fix/nvt/uef doesn't exist + +The compute pressure introduced via fix_modify does not exist + +E: Using fix nvt/npt/uef without a compute pressure/uef + +Self-explanatory. + +E: Using fix nvt/npt/uef without a compute temp/uef + +Self-explanatory. + +E: Initial box is not close enough to the expected uef box + +The initial box does not correspond to the shape required by the value of the strain keyword. If the default strain value of zero was used, the initial box is not cubic. + +*/ diff --git a/src/USER-UEF/fix_npt_uef.cpp b/src/USER-UEF/fix_npt_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b8ab1eccf92c6b044e9d8274d2ebf1d6eece8f38 --- /dev/null +++ b/src/USER-UEF/fix_npt_uef.cpp @@ -0,0 +1,28 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#include "fix_npt_uef.h" +#include "error.h" + +using namespace LAMMPS_NS; + +FixNPTUef::FixNPTUef(LAMMPS *lmp, int narg, char **arg) : + FixNHUef(lmp, narg, arg) +{ + if (!tstat_flag) + error->all(FLERR,"Temperature control must be used with fix npt/uef"); + if (!pstat_flag) + error->all(FLERR,"Pressure control must be used with fix npt/uef"); +} diff --git a/src/USER-UEF/fix_npt_uef.h b/src/USER-UEF/fix_npt_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..4c0cd28ae02236e11d678c6fd5cd076dcbf7febc --- /dev/null +++ b/src/USER-UEF/fix_npt_uef.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(npt/uef,FixNPTUef) + +#else + +#ifndef LMP_FIX_NPT_UEF_H +#define LMP_FIX_NPT_UEF_H + +#include "fix_nh_uef.h" + +namespace LAMMPS_NS { + +class FixNPTUef : public FixNHUef { + public: + FixNPTUef(class LAMMPS *, int, char **); + ~FixNPTUef() {} +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Temperature control must be used with fix npt uef + +Self-explanatory. + +E: Pressure control must be used with fix npt uef + +Self-explanatory. + +*/ diff --git a/src/USER-UEF/fix_nvt_uef.cpp b/src/USER-UEF/fix_nvt_uef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2fd4a2854571969fb49c19d64c77c0ef901787d1 --- /dev/null +++ b/src/USER-UEF/fix_nvt_uef.cpp @@ -0,0 +1,30 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#include "fix_nvt_uef.h" +#include "error.h" + +using namespace LAMMPS_NS; + +FixNVTUef::FixNVTUef(LAMMPS *lmp, int narg, char **arg) : + FixNHUef(lmp, narg, arg) +{ + if (!tstat_flag) + error->all(FLERR,"Temperature control must be used with fix nvt/uef"); + if (pstat_flag) + error->all(FLERR,"Pressure control can't be used with fix nvt/uef"); +} + + diff --git a/src/USER-UEF/fix_nvt_uef.h b/src/USER-UEF/fix_nvt_uef.h new file mode 100644 index 0000000000000000000000000000000000000000..718e36e756780086884c4c451a5f5faa67c29573 --- /dev/null +++ b/src/USER-UEF/fix_nvt_uef.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(nvt/uef,FixNVTUef) + +#else + +#ifndef LMP_FIX_NVT_UEF_H +#define LMP_FIX_NVT_UEF_H + +#include "fix_nh_uef.h" + +namespace LAMMPS_NS { + +class FixNVTUef : public FixNHUef { + public: + FixNVTUef(class LAMMPS *, int, char **); + ~FixNVTUef(){} +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Temperature control must be used with fix npt uef + +Self-explanatory. + +E: Pressure control must be used with fix npt uef + +Self-explanatory. + +*/ diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f270fe1d8b41760aa1236c1e4b478f79a24203ca --- /dev/null +++ b/src/USER-UEF/uef_utils.cpp @@ -0,0 +1,366 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- + + This class contains functions to calculate the evolution of the periodic + simulation box under elongational flow as described by Matthew Dobson + in the arXiv preprint at http://arxiv.org/abs/1408.7078 + + Additionally, there are methods to do a lattice reduction to further + reduce the simulation box using the method of Igor Semaev at + http://link.springer.com/chapter/10.1007%2F3-540-44670-2_13 +*/ + +#include <math.h> +#include "uef_utils.h" + +namespace LAMMPS_NS { + namespace UEF_utils{ + +UEFBox::UEFBox() +{ + // initial box (also an inverse eigenvector matrix of automorphisms) + double x = 0.327985277605681; + double y = 0.591009048506103; + double z = 0.736976229099578; + l0[0][0]= z; l0[0][1]= y; l0[0][2]= x; + l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y; + l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z; + // spectra of the two automorpisms (log of eigenvalues) + w1[0]=-1.177725211523360; + w1[1]=-0.441448620566067; + w1[2]= 1.619173832089425; + w2[0]= w1[1]; + w2[1]= w1[2]; + w2[2]= w1[0]; + // initialize theta + // strain = w1 * theta1 + w2 * theta2 + theta[0]=theta[1]=0; + + + //set up the initial box l and change of basis matrix r + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + { + l[k][j] = l0[k][j]; + r[j][k]=(j==k); + } + + // get the initial rotation and upper triangular matrix + rotation_matrix(rot, lrot ,l); + + // this is just a way to calculate the automorphisms + // themselves, which play a minor role in the calculations + // it's overkill, but only called once + double t1[3][3]; + double t1i[3][3]; + double t2[3][3]; + double t2i[3][3]; + double l0t[3][3]; + for (int k=0; k<3; ++k) + for (int j=0; j<3; ++j) + { + t1[k][j] = exp(w1[k])*l0[k][j]; + t1i[k][j] = exp(-w1[k])*l0[k][j]; + t2[k][j] = exp(w2[k])*l0[k][j]; + t2i[k][j] = exp(-w2[k])*l0[k][j]; + l0t[k][j] = l0[j][k]; + } + mul_m2(l0t,t1); + mul_m2(l0t,t1i); + mul_m2(l0t,t2); + mul_m2(l0t,t2i); + for (int k=0; k<3; ++k) + for (int j=0; j<3; ++j) + { + a1[k][j] = round(t1[k][j]); + a1i[k][j] = round(t1i[k][j]); + a2[k][j] = round(t2[k][j]); + a2i[k][j] = round(t2i[k][j]); + } + + // winv used to transform between + // strain increments and theta increments + winv[0][0] = w2[1]; + winv[0][1] = -w2[0]; + winv[1][0] = -w1[1]; + winv[1][1] = w1[0]; + double d = w1[0]*w2[1] - w2[0]*w1[1]; + for (int k=0;k<2;k++) + for (int j=0;j<2;j++) + winv[k][j] /= d; +} + +// get volume-correct r basis in: basis*cbrt(vol) = q*r +void UEFBox::get_box(double x[3][3], double v) +{ + v = cbrtf(v); + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + x[k][j] = lrot[k][j]*v; +} + +// get rotation matrix q in: basis = q*r +void UEFBox::get_rot(double x[3][3]) +{ + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + x[k][j]=rot[k][j]; +} + +// diagonal, incompressible deformation +void UEFBox::step_deform(const double ex, const double ey) +{ + // increment theta values used in the reduction + theta[0] +=winv[0][0]*ex + winv[0][1]*ey; + theta[1] +=winv[1][0]*ex + winv[1][1]*ey; + + // deformation of the box. reduce() needs to + // be called regularly or calculation will become + // unstable + double eps[3]; + eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey; + for (int k=0;k<3;k++) + { + eps[k] = exp(eps[k]); + l[k][0] = eps[k]*l[k][0]; + l[k][1] = eps[k]*l[k][1]; + l[k][2] = eps[k]*l[k][2]; + } + rotation_matrix(rot,lrot, l); +} +// reuduce the current basis +bool UEFBox::reduce() +{ + // determine how many times to apply the automorphisms + // and find new theta values + int f1 = round(theta[0]); + int f2 = round(theta[1]); + theta[0] -= f1; + theta[1] -= f2; + + // store old change or basis matrix to determine if it + // changes + int r0[3][3]; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + r0[k][j]=r[k][j]; + + // this modifies the old change basis matrix to + // handle the case where the automorphism transforms + // the box but the reduced basis doesn't change + // (r0 should still equal r at the end) + if (f1 > 0) for (int k=0;k<f1;k++) mul_m2 (a1,r0); + if (f1 < 0) for (int k=0;k<-f1;k++) mul_m2 (a1i,r0); + if (f2 > 0) for (int k=0;k<f2;k++) mul_m2 (a2,r0); + if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0); + + // robust reduction to the box defined by Dobson + for (int k=0;k<3;k++) + { + double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]); + l[k][0] = eps*l0[k][0]; + l[k][1] = eps*l0[k][1]; + l[k][2] = eps*l0[k][2]; + } + // further reduce the box using greedy reduction and check + // if it changed from the last step using the change of basis + // matrices r and r0 + greedy(l,r); + rotation_matrix(rot,lrot, l); + return !mat_same(r,r0); +} +void UEFBox::set_strain(const double ex, const double ey) +{ + theta[0] =winv[0][0]*ex + winv[0][1]*ey; + theta[1] =winv[1][0]*ex + winv[1][1]*ey; + theta[0] -= round(theta[0]); + theta[1] -= round(theta[1]); + + for (int k=0;k<3;k++) + { + double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]); + l[k][0] = eps*l0[k][0]; + l[k][1] = eps*l0[k][1]; + l[k][2] = eps*l0[k][2]; + } + greedy(l,r); + rotation_matrix(rot,lrot, l); +} + +// this is just qr reduction using householder reflections +// m is input matrix, q is a rotation, r is upper triangular +// q*m = r +void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3]) +{ + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + r[k][j] = m[k][j]; + + double a = -sqrt(col_prod(r,0,0))*r[0][0]/fabs(r[0][0]); + double v[3]; + v[0] = r[0][0]-a; + v[1] = r[1][0]; + v[2] = r[2][0]; + a = sqrt(v[0]*v[0]+v[1]*v[1]+v[2]*v[2]); + v[0] /= a; v[1] /= a; v[2] /= a; + double qt[3][3]; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + { + qt[k][j] = (k==j) - 2*v[k]*v[j]; + q[k][j]= qt[k][j]; + } + mul_m2(qt,r); + a = -sqrt(r[1][1]*r[1][1] + r[2][1]*r[2][1])*r[1][1]/fabs(r[1][1]); + v[0] = 0; + v[1] = r[1][1] - a; + v[2] = r[2][1]; + a = sqrt(v[1]*v[1]+v[2]*v[2]); + v[1] /= a; + v[2] /= a; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + qt[k][j] = (k==j) - 2*v[k]*v[j]; + mul_m2(qt,r); + mul_m2(qt,q); + // this makes r have positive diagonals + // q*m = r <==> (-q)*m = (-r) will hold row-wise + if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); } + if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); } + if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); } +} + + + +//sort columns in order of increasing length +void col_sort(double b[3][3],int r[3][3]) +{ + if (col_prod(b,0,0)>col_prod(b,1,1)) + { + col_swap(b,0,1); + col_swap(r,0,1); + } + if (col_prod(b,0,0)>col_prod(b,2,2)) + { + col_swap(b,0,2); + col_swap(r,0,2); + } + if (col_prod(b,1,1)>col_prod(b,2,2)) + { + col_swap(b,1,2); + col_swap(r,1,2); + } +} + + +// 1-2 reduction (Graham-Schmidt) +void red12(double b[3][3],int r[3][3]) +{ + int y = round(col_prod(b,0,1)/col_prod(b,0,0)); + b[0][1] -= y*b[0][0]; + b[1][1] -= y*b[1][0]; + b[2][1] -= y*b[2][0]; + + r[0][1] -= y*r[0][0]; + r[1][1] -= y*r[1][0]; + r[2][1] -= y*r[2][0]; + if (col_prod(b,1,1) < col_prod(b,0,0)) + { + col_swap(b,0,1); + col_swap(r,0,1); + red12(b,r); + } +} + +// The Semaev condition for a 3-reduced basis +void red3(double b[3][3], int r[3][3]) +{ + double b11 = col_prod(b,0,0); + double b22 = col_prod(b,1,1); + double b12 = col_prod(b,0,1); + double b13 = col_prod(b,0,2); + double b23 = col_prod(b,1,2); + + double y2 =-(b23/b22-b12/b22*b13/b11)/(1-b12/b11*b12/b22); + double y1 =-(b13/b11-b12/b11*b23/b22)/(1-b12/b11*b12/b22); + + int x1=0; + int x2=0; + double min = col_prod(b,2,2); + int x1v[2]; + int x2v[2]; + x1v[0] = floor(y1); x1v[1] = x1v[0]+1; + x2v[0] = floor(y2); x2v[1] = x2v[0]+1; + for (int k=0;k<2;k++) + for (int j=0;j<2;j++) + { + double a[3]; + a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1]; + a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1]; + a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1]; + double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2]; + if (val<min) + { + min = val; + x1 = x1v[k]; + x2 = x2v[j]; + } + } + if (x1 || x2) + { + b[0][2] += x1*b[0][0] + x2*b[0][1]; + b[1][2] += x1*b[1][0] + x2*b[1][1]; + b[2][2] += x1*b[2][0] + x2*b[2][1]; + r[0][2] += x1*r[0][0] + x2*r[0][1]; + r[1][2] += x1*r[1][0] + x2*r[1][1]; + r[2][2] += x1*r[2][0] + x2*r[2][1]; + greedy_recurse(b,r); // note the recursion step is here + } +} + +// the meat of the greedy reduction algorithm +void greedy_recurse(double b[3][3], int r[3][3]) +{ + col_sort(b,r); + red12(b,r); + red3(b,r); // recursive caller +} + +// set r (change of basis) to be identity then reduce basis and make it unique +void greedy(double b[3][3],int r[3][3]) +{ + r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0; + r[0][0]=r[1][1]=r[2][2]=1; + greedy_recurse(b,r); + make_unique(b,r); +} + +// A reduced basis isn't unique. This procedure will make it +// "more" unique. Degenerate cases are possible, but unlikely +// with floating point math. +void make_unique(double b[3][3], int r[3][3]) +{ + if (fabs(b[0][0]) < fabs(b[0][1])) + { col_swap(b,0,1); col_swap(r,0,1); } + if (fabs(b[0][0]) < fabs(b[0][2])) + { col_swap(b,0,2); col_swap(r,0,2); } + if (fabs(b[1][1]) < fabs(b[1][2])) + { col_swap(b,1,2); col_swap(r,1,2); } + + if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); } + if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); } + if (det(b) < 0){ neg_col(b,2); neg_col(r,2); } +} +}} diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..a16f6fff1a70f1e5e15b3f993efc702deaaf034f --- /dev/null +++ b/src/USER-UEF/uef_utils.h @@ -0,0 +1,131 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: David Nicholson (MIT) +------------------------------------------------------------------------- */ + +#ifndef LMP_UEF_UTILS_H +#define LMP_UEF_UTILS_H + +namespace LAMMPS_NS{ namespace UEF_utils { + +class UEFBox +{ + public: + UEFBox(); + void set_strain(const double, const double); + void step_deform(const double,const double); + bool reduce(); + void get_box(double[3][3], double); + void get_rot(double[3][3]); + private: + double l0[3][3]; // initial basis + double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms) + //double edot[3], delta[2]; + double theta[2]; + double l[3][3], rot[3][3], lrot[3][3]; + int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3]; +}; + + +// lattice reduction routines +void greedy(double[3][3],int[3][3]); +void col_sort(double[3][3],int[3][3]); +void red12(double[3][3],int[3][3]); +void greedy_recurse(double[3][3],int[3][3]); +void red3(double [3][3],int r[3][3]); +void make_unique(double[3][3],int[3][3]); +void rotation_matrix(double[3][3],double[3][3],const double [3][3]); + +// A few utility functions for 3x3 arrays +template<typename T> +T col_prod(T x[3][3], int c1, int c2) +{ + return x[0][c1]*x[0][c2]+x[1][c1]*x[1][c2]+x[2][c1]*x[2][c2]; +} + +template<typename T> +void col_swap(T x[3][3], int c1, int c2) +{ + for (int k=0;k<3;k++) + { + T t = x[k][c2]; + x[k][c2]=x[k][c1]; + x[k][c1]=t; + } +} + +template<typename T> +void neg_col(T x[3][3], int c1) +{ + x[0][c1] = -x[0][c1]; + x[1][c1] = -x[1][c1]; + x[2][c1] = -x[2][c1]; +} + +template<typename T> +void neg_row(T x[3][3], int c1) +{ + x[c1][0] = -x[c1][0]; + x[c1][1] = -x[c1][1]; + x[c1][2] = -x[c1][2]; +} + +template<typename T> +T det(T x[3][3]) +{ + double val; + val = x[0][0]*(x[1][1]*x[2][2] - x[1][2]*x[2][1]); + val -= x[0][1]*(x[1][0]*x[2][2] - x[1][2]*x[2][0]); + val += x[0][2]*(x[1][0]*x[2][1] - x[1][1]*x[2][0]); + return val; +} + +template<typename T> +bool mat_same(T x1[3][3], T x2[3][3]) +{ + bool v = true; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + v &= (x1[k][j]==x2[k][j]); + return v; +} + +template<typename T> +void mul_m1(T m1[3][3], const T m2[3][3]) +{ + T t[3][3]; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + t[k][j]=m1[k][j]; + + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j]; +} + +template<typename T> +void mul_m2(const T m1[3][3], T m2[3][3]) +{ + T t[3][3]; + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + t[k][j]=m2[k][j]; + + for (int k=0;k<3;k++) + for (int j=0;j<3;j++) + m2[k][j] = m1[k][0]*t[0][j] + m1[k][1]*t[1][j] + m1[k][2]*t[2][j]; +} + +} +} +#endif diff --git a/src/atom.cpp b/src/atom.cpp index 1191f0f2b557ca769c07cb7971712b2ec33dba3b..7d343a0807798418edec5725ff50b6168a6658d4 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -453,12 +453,12 @@ void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix) // if molecular system: // atom IDs must be defined // force atom map to be created - // map style may be reset by map_init() and its call to map_style_set() + // map style will be reset to array vs hash to by map_init() molecular = avec->molecular; if (molecular && tag_enable == 0) error->all(FLERR,"Atom IDs must be used for molecular systems"); - if (molecular) map_style = 1; + if (molecular) map_style = 3; } /* ---------------------------------------------------------------------- @@ -593,6 +593,7 @@ void Atom::modify_params(int narg, char **arg) "Atom_modify map command after simulation box is defined"); if (strcmp(arg[iarg+1],"array") == 0) map_user = 1; else if (strcmp(arg[iarg+1],"hash") == 0) map_user = 2; + else if (strcmp(arg[iarg+1],"yes") == 0) map_user = 3; else error->all(FLERR,"Illegal atom_modify command"); map_style = map_user; iarg += 2; diff --git a/src/atom_map.cpp b/src/atom_map.cpp index bbfe014dec3ae6032ffe973fbb0fd282ef4af1b8..9d257d99de871f599afe709a399d83b08bfdda18 100644 --- a/src/atom_map.cpp +++ b/src/atom_map.cpp @@ -298,12 +298,12 @@ int Atom::map_style_set() MPI_Allreduce(&max,&map_tag_max,1,MPI_LMP_TAGINT,MPI_MAX,world); // set map_style for new map - // if user-selected, use that setting + // if user-selected to array/hash, use that setting // else if map_tag_max > 1M, use hash // else use array int map_style_old = map_style; - if (map_user) map_style = map_user; + if (map_user == 1 || map_user == 2) map_style = map_user; else if (map_tag_max > 1000000) map_style = 2; else map_style = 1; diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 3c972b82449377dcdd6c9dbe7c929ca2a584080c..06227b7a84cfeb2e7e230218778a4b60fca411cb 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -476,8 +476,7 @@ void CommBrick::forward_comm(int dummy) if (sendproc[iswap] != me) { if (comm_x_only) { if (size_forward_recv[iswap]) { - if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; - else buf = NULL; + buf = x[firstrecv[iswap]]; MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } @@ -547,8 +546,7 @@ void CommBrick::reverse_comm() MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); if (size_reverse_send[iswap]) { - if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]]; - else buf = NULL; + buf = f[firstrecv[iswap]]; MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, recvproc[iswap],0,world); } diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index 04a2df91f8b252586705d8418cc774afb4cc448c..992049a81f0d73d8182e393d2d1a2ac4343c6d35 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -343,6 +343,11 @@ void CreateAtoms::command(int narg, char **arg) } } + // Record wall time for atom creation + + MPI_Barrier(world); + double time1 = MPI_Wtime(); + // clear ghost count and any ghost bonus data internal to AtomVec // same logic as beginning of Comm::exchange() // do it now b/c creating atoms will overwrite ghost atoms @@ -509,6 +514,9 @@ void CreateAtoms::command(int narg, char **arg) if (domain->triclinic) domain->lamda2x(atom->nlocal); } + MPI_Barrier(world); + double time2 = MPI_Wtime(); + // clean up delete ranmol; @@ -521,12 +529,16 @@ void CreateAtoms::command(int narg, char **arg) // print status if (comm->me == 0) { - if (screen) + if (screen) { fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", atom->natoms-natoms_previous); - if (logfile) + fprintf(screen," Time spent = %g secs\n",time2-time1); + } + if (logfile) { fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", atom->natoms-natoms_previous); + fprintf(logfile," Time spent = %g secs\n",time2-time1); + } } // for MOLECULE mode: diff --git a/src/domain.cpp b/src/domain.cpp index 427f7785e816c19d8d309abd35b8149113994691..bad503037322e1a51b2cd1a62eeb9290e943a54c 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -772,7 +772,7 @@ void Domain::image_check() delz = unwrap[i][2] - unwrap[k][2]; if (xperiodic && delx > xprd_half) flag = 1; - if (xperiodic && dely > yprd_half) flag = 1; + if (yperiodic && dely > yprd_half) flag = 1; if (dimension == 3 && zperiodic && delz > zprd_half) flag = 1; if (!xperiodic && delx > xprd) flag = 1; if (!yperiodic && dely > yprd) flag = 1; diff --git a/src/dump.cpp b/src/dump.cpp index 44098298ba53918e9fb38b61a12ae97e46682eda..ddd958c25c6715c5f41512fb542280341839b8c1 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -238,7 +238,7 @@ void Dump::init() int gcmcflag = 0; for (int i = 0; i < modify->nfix; i++) if ((strcmp(modify->fix[i]->style,"gcmc") == 0)) - gcmcflag = 1; + gcmcflag = 1; if (sortcol == 0 && atom->tag_consecutive() && !gcmcflag) { tagint *tag = atom->tag; @@ -898,7 +898,7 @@ void Dump::modify_params(int narg, char **arg) } else if (strcmp(arg[iarg],"fileper") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); if (!multiproc) - error->all(FLERR,"Cannot use dump_modify fileper " + error->all(FLERR,"Cannot use dump_modify fileper " "without % in dump file name"); int nper = force->inumeric(FLERR,arg[iarg+1]); if (nper <= 0) error->all(FLERR,"Illegal dump_modify command"); @@ -973,7 +973,7 @@ void Dump::modify_params(int narg, char **arg) } else if (strcmp(arg[iarg],"nfile") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); if (!multiproc) - error->all(FLERR,"Cannot use dump_modify nfile " + error->all(FLERR,"Cannot use dump_modify nfile " "without % in dump file name"); int nfile = force->inumeric(FLERR,arg[iarg+1]); if (nfile <= 0) error->all(FLERR,"Illegal dump_modify command"); diff --git a/src/finish.cpp b/src/finish.cpp index 45e9226388d30c4a6cd33fc1f91d75500e323d99..c22ecaae6053c828f896b6ebfae2b8ef471ceb90 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -130,7 +130,7 @@ void Finish::end(int flag) atom->natoms); if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps, atom->natoms); - + // Gromacs/NAMD-style performance metric for suitable unit settings if ( timeflag && !minflag && !prdflag && !tadflag && @@ -144,7 +144,7 @@ void Finish::end(int flag) double one_fs = force->femtosecond; double t_step = ((double) time_loop) / ((double) update->nsteps); double step_t = 1.0/t_step; - + if (strcmp(update->unit_style,"lj") == 0) { double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs; const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n"; @@ -161,7 +161,7 @@ void Finish::end(int flag) } // CPU use on MPI tasks and OpenMP threads - + if (timeflag) { if (lmp->kokkos) { const char fmt2[] = diff --git a/src/fix.h b/src/fix.h index 3f32895309ac1cacf3d0e76ed2cebd5e59246179..21dfc955a87b85bed87c4703e2b44d4c6a252d5f 100644 --- a/src/fix.h +++ b/src/fix.h @@ -113,6 +113,7 @@ class Fix : protected Pointers { virtual void setup(int) {} virtual void setup_pre_exchange() {} virtual void setup_pre_neighbor() {} + virtual void setup_post_neighbor() {} virtual void setup_pre_force(int) {} virtual void setup_pre_reverse(int, int) {} virtual void min_setup(int) {} @@ -120,6 +121,7 @@ class Fix : protected Pointers { virtual void post_integrate() {} virtual void pre_exchange() {} virtual void pre_neighbor() {} + virtual void post_neighbor() {} virtual void pre_force(int) {} virtual void pre_reverse(int,int) {} virtual void post_force(int) {} @@ -155,6 +157,7 @@ class Fix : protected Pointers { virtual void min_pre_exchange() {} virtual void min_pre_neighbor() {} + virtual void min_post_neighbor() {} virtual void min_pre_force(int) {} virtual void min_pre_reverse(int,int) {} virtual void min_post_force(int) {} @@ -244,25 +247,27 @@ namespace FixConst { static const int POST_INTEGRATE = 1<<1; static const int PRE_EXCHANGE = 1<<2; static const int PRE_NEIGHBOR = 1<<3; - static const int PRE_FORCE = 1<<4; - static const int PRE_REVERSE = 1<<5; - static const int POST_FORCE = 1<<6; - static const int FINAL_INTEGRATE = 1<<7; - static const int END_OF_STEP = 1<<8; - static const int POST_RUN = 1<<9; - static const int THERMO_ENERGY = 1<<10; - static const int INITIAL_INTEGRATE_RESPA = 1<<11; - static const int POST_INTEGRATE_RESPA = 1<<12; - static const int PRE_FORCE_RESPA = 1<<13; - static const int POST_FORCE_RESPA = 1<<14; - static const int FINAL_INTEGRATE_RESPA = 1<<15; - static const int MIN_PRE_EXCHANGE = 1<<16; - static const int MIN_PRE_NEIGHBOR = 1<<17; - static const int MIN_PRE_FORCE = 1<<18; - static const int MIN_PRE_REVERSE = 1<<19; - static const int MIN_POST_FORCE = 1<<20; - static const int MIN_ENERGY = 1<<21; - static const int FIX_CONST_LAST = 1<<22; + static const int POST_NEIGHBOR = 1<<4; + static const int PRE_FORCE = 1<<5; + static const int PRE_REVERSE = 1<<6; + static const int POST_FORCE = 1<<7; + static const int FINAL_INTEGRATE = 1<<8; + static const int END_OF_STEP = 1<<9; + static const int POST_RUN = 1<<10; + static const int THERMO_ENERGY = 1<<11; + static const int INITIAL_INTEGRATE_RESPA = 1<<12; + static const int POST_INTEGRATE_RESPA = 1<<13; + static const int PRE_FORCE_RESPA = 1<<14; + static const int POST_FORCE_RESPA = 1<<15; + static const int FINAL_INTEGRATE_RESPA = 1<<16; + static const int MIN_PRE_EXCHANGE = 1<<17; + static const int MIN_PRE_NEIGHBOR = 1<<18; + static const int MIN_POST_NEIGHBOR = 1<<19; + static const int MIN_PRE_FORCE = 1<<20; + static const int MIN_PRE_REVERSE = 1<<21; + static const int MIN_POST_FORCE = 1<<22; + static const int MIN_ENERGY = 1<<23; + static const int FIX_CONST_LAST = 1<<24; } } diff --git a/src/fix_heat.cpp b/src/fix_heat.cpp index 97e0ed6a7fd76662087d4db5415d9808ec535c39..846531dbb987eaa3c1870c4c5c20e1ba1448f82c 100644 --- a/src/fix_heat.cpp +++ b/src/fix_heat.cpp @@ -127,7 +127,7 @@ void FixHeat::init() } // check for rigid bodies in region (done here for performance reasons) - if (modify->check_rigid_region_overlap(groupbit,domain->regions[iregion])) + if (iregion >= 0 && modify->check_rigid_region_overlap(groupbit,domain->regions[iregion])) error->warning(FLERR,"Cannot apply fix heat to atoms in rigid bodies"); // cannot have 0 atoms in group diff --git a/src/fix_shear_history.cpp b/src/fix_neigh_history.cpp similarity index 55% rename from src/fix_shear_history.cpp rename to src/fix_neigh_history.cpp index 17e78830f4e1a7205ca10631792d66eefe0e4d12..322c8d55619abd0ef17b0a7788523f978261e53b 100644 --- a/src/fix_shear_history.cpp +++ b/src/fix_neigh_history.cpp @@ -14,7 +14,7 @@ #include <mpi.h> #include <string.h> #include <stdio.h> -#include "fix_shear_history.h" +#include "fix_neigh_history.h" #include "atom.h" #include "comm.h" #include "neighbor.h" @@ -29,16 +29,16 @@ using namespace LAMMPS_NS; using namespace FixConst; -enum{DEFAULT,NPARTNER,PERPARTNER}; +enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history/omp /* ---------------------------------------------------------------------- */ -FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) : +FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), - npartner(NULL), partner(NULL), shearpartner(NULL), pair(NULL), - ipage(NULL), dpage(NULL) + npartner(NULL), partner(NULL), valuepartner(NULL), pair(NULL), + ipage_atom(NULL), dpage_atom(NULL), ipage_neigh(NULL), dpage_neigh(NULL) { - if (narg != 4) error->all(FLERR,"Illegal fix SHEAR_HISTORY command"); + if (narg != 4) error->all(FLERR,"Illegal fix NEIGH_HISTORY command"); restart_peratom = 1; create_attribute = 1; @@ -48,9 +48,12 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) : dnum = force->inumeric(FLERR,arg[3]); dnumbytes = dnum * sizeof(double); + zeroes = new double[dnum]; + for (int i = 0; i < dnum; i++) zeroes[i] = 0.0; + onesided = 0; - if (strcmp(id,"LINE_SHEAR_HISTORY") == 0) onesided = 1; - if (strcmp(id,"TRI_SHEAR_HISTORY") == 0) onesided = 1; + if (strcmp(id,"LINE_NEIGH_HISTORY") == 0) onesided = 1; + if (strcmp(id,"TRI_NEIGH_HISTORY") == 0) onesided = 1; if (newton_pair) comm_reverse = 1; // just for single npartner value // variable-size history communicated via @@ -65,11 +68,24 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) : pgsize = oneatom = 0; + // other per-atom vectors + + firstflag = NULL; + firstvalue = NULL; + maxatom = 0; + + // per-atom and per-neighbor data structs + + ipage_atom = NULL; + dpage_atom = NULL; + ipage_neigh = NULL; + dpage_neigh = NULL; + // initialize npartner to 0 so neighbor list creation is OK the 1st time int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) npartner[i] = 0; - maxtouch = 0; + maxpartner = 0; nlocal_neigh = nall_neigh = 0; commflag = DEFAULT; @@ -77,7 +93,7 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) : /* ---------------------------------------------------------------------- */ -FixShearHistory::~FixShearHistory() +FixNeighHistory::~FixNeighHistory() { // unregister this fix so atom class doesn't invoke it any more @@ -86,86 +102,111 @@ FixShearHistory::~FixShearHistory() // delete locally stored arrays + delete [] zeroes; + + memory->sfree(firstflag); + memory->sfree(firstvalue); + memory->destroy(npartner); memory->sfree(partner); - memory->sfree(shearpartner); + memory->sfree(valuepartner); + + delete [] ipage_atom; + delete [] dpage_atom; + delete [] ipage_neigh; + delete [] dpage_neigh; // to better detect use-after-delete errors + firstflag = NULL; + firstvalue = NULL; + pair = NULL; npartner = NULL; partner = NULL; - shearpartner = NULL; - - delete [] ipage; - delete [] dpage; + valuepartner = NULL; } /* ---------------------------------------------------------------------- */ -int FixShearHistory::setmask() +int FixNeighHistory::setmask() { int mask = 0; mask |= PRE_EXCHANGE; mask |= MIN_PRE_EXCHANGE; + mask |= POST_NEIGHBOR; + mask |= MIN_POST_NEIGHBOR; return mask; } /* ---------------------------------------------------------------------- */ -void FixShearHistory::init() +void FixNeighHistory::init() { if (atom->tag_enable == 0) - error->all(FLERR,"Granular shear history requires atoms have IDs"); + error->all(FLERR,"Neighbor history requires atoms have IDs"); allocate_pages(); } /* ---------------------------------------------------------------------- create pages if first time or if neighbor pgsize/oneatom has changed - note that latter could cause shear history info to be discarded + note that latter could cause neighbor history info to be discarded ------------------------------------------------------------------------- */ -void FixShearHistory::allocate_pages() +void FixNeighHistory::allocate_pages() { int create = 0; - if (ipage == NULL) create = 1; + if (ipage_atom == NULL) create = 1; if (pgsize != neighbor->pgsize) create = 1; if (oneatom != neighbor->oneatom) create = 1; if (create) { - delete [] ipage; - delete [] dpage; + delete [] ipage_atom; + delete [] dpage_atom; + delete [] ipage_neigh; + delete [] dpage_neigh; pgsize = neighbor->pgsize; oneatom = neighbor->oneatom; int nmypage = comm->nthreads; - ipage = new MyPage<tagint>[nmypage]; - dpage = new MyPage<double>[nmypage]; + ipage_atom = new MyPage<tagint>[nmypage]; + dpage_atom = new MyPage<double>[nmypage]; + ipage_neigh = new MyPage<int>[nmypage]; + dpage_neigh = new MyPage<double>[nmypage]; for (int i = 0; i < nmypage; i++) { - ipage[i].init(oneatom,pgsize); - dpage[i].init(dnum*oneatom,dnum*pgsize); + ipage_atom[i].init(oneatom,pgsize); + dpage_atom[i].init(dnum*oneatom,dnum*pgsize); + ipage_neigh[i].init(oneatom,pgsize); + dpage_neigh[i].init(dnum*oneatom,dnum*pgsize); } } } +/* ---------------------------------------------------------------------- */ + +void FixNeighHistory::setup_post_neighbor() +{ + post_neighbor(); +} + /* ---------------------------------------------------------------------- - copy shear partner info from neighbor lists to atom arrays - should be called whenever neighbor list stores current history info - and need to store the info with owned atoms - e.g. so atoms can migrate to new procs or between runs - when atoms may be added or deleted (neighbor list becomes out-of-date) - the next granular neigh list build will put this info back into neigh list + copy partner info from neighbor data structs (NDS) to atom arrays + should be called whenever NDS store current history info + and need to transfer the info to owned atoms + e.g. when atoms migrate to new procs, new neigh list built, or between runs + when atoms may be added or deleted (NDS becomes out-of-date) + the next post_neighbor() will put this info back into new NDS called during run before atom exchanges, including for restart files called at end of run via post_run() do not call during setup of run (setup_pre_exchange) - b/c there is no guarantee of a current neigh list (even on continued run) + b/c there is no guarantee of a current NDS (even on continued run) if run command does a 2nd run with pre = no, then no neigh list will be built, but old neigh list will still have the info onesided and newton on and newton off versions ------------------------------------------------------------------------- */ -void FixShearHistory::pre_exchange() +void FixNeighHistory::pre_exchange() { if (onesided) pre_exchange_onesided(); else if (newton_pair) pre_exchange_newton(); @@ -178,60 +219,57 @@ void FixShearHistory::pre_exchange() only store history info with spheres ------------------------------------------------------------------------- */ -void FixShearHistory::pre_exchange_onesided() +void FixNeighHistory::pre_exchange_onesided() { int i,j,ii,jj,m,n,inum,jnum; int *ilist,*jlist,*numneigh,**firstneigh; - int *touch,**firsttouch; - double *shear,*allshear,**firstshear; + int *allflags; + double *allvalues,*onevalues; // NOTE: all operations until very end are with nlocal_neigh <= current nlocal // b/c previous neigh list was built with nlocal_neigh // nlocal can be larger if other fixes added atoms at this pre_exchange() - // zero npartner for owned atoms - // clear 2 page data structures - - for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; + // clear two paged data structures - ipage->reset(); - dpage->reset(); + ipage_atom->reset(); + dpage_atom->reset(); // 1st loop over neighbor list, I = sphere, J = tri // only calculate npartner for owned spheres + for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; + tagint *tag = atom->tag; NeighList *list = pair->list; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; for (jj = 0; jj < jnum; jj++) - if (touch[jj]) npartner[i]++; + if (allflags[jj]) npartner[i]++; } - // get page chunks to store atom IDs and shear history for owned atoms + // get page chunks to store partner IDs and values for owned atoms for (ii = 0; ii < inum; ii++) { i = ilist[ii]; n = npartner[i]; - partner[i] = ipage->get(n); - shearpartner[i] = dpage->get(dnum*n); - if (partner[i] == NULL || shearpartner[i] == NULL) - error->one(FLERR,"Shear history overflow, boost neigh_modify one"); + partner[i] = ipage_atom->get(n); + valuepartner[i] = dpage_atom->get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); } // 2nd loop over neighbor list, I = sphere, J = tri - // store atom IDs and shear history for owned spheres + // store partner IDs and values for owned+ghost atoms // re-zero npartner to use as counter for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; @@ -239,28 +277,28 @@ void FixShearHistory::pre_exchange_onesided() for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; - allshear = firstshear[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { - shear = &allshear[dnum*jj]; + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; j = jlist[jj]; j &= NEIGHMASK; m = npartner[i]++; partner[i][m] = tag[j]; - memcpy(&shearpartner[i][dnum*m],shear,dnumbytes); + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); } } } - // set maxtouch = max # of partners of any owned atom + // set maxpartner = max # of partners of any owned atom // bump up comm->maxexchange_fix if necessary - maxtouch = 0; - for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]); - comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1); + maxpartner = 0; + for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); // zero npartner values from previous nlocal_neigh to current nlocal @@ -269,50 +307,47 @@ void FixShearHistory::pre_exchange_onesided() } /* ---------------------------------------------------------------------- - newton on version, for sphere/sphere contacts - performs reverse comm to acquire shear partner info from ghost atoms + newton ON version + performs reverse comm to acquire partner values from ghost atoms ------------------------------------------------------------------------- */ -void FixShearHistory::pre_exchange_newton() +void FixNeighHistory::pre_exchange_newton() { int i,j,ii,jj,m,n,inum,jnum; int *ilist,*jlist,*numneigh,**firstneigh; - int *touch,**firsttouch; - double *shear,*shearj,*allshear,**firstshear; + int *allflags; + double *allvalues,*onevalues,*jvalues; // NOTE: all operations until very end are with // nlocal_neigh <= current nlocal and nall_neigh - // b/c previous neigh list was built with nlocal_neigh,nghost_neigh + // b/c previous neigh list was built with nlocal_neigh & nghost_neigh // nlocal can be larger if other fixes added atoms at this pre_exchange() - // zero npartner for owned+ghost atoms - // clear 2 page data structures - - for (i = 0; i < nall_neigh; i++) npartner[i] = 0; + // clear two paged data structures - ipage->reset(); - dpage->reset(); + ipage_atom->reset(); + dpage_atom->reset(); // 1st loop over neighbor list // calculate npartner for owned+ghost atoms + for (i = 0; i < nall_neigh; i++) npartner[i] = 0; + tagint *tag = atom->tag; NeighList *list = pair->list; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { + if (allflags[jj]) { npartner[i]++; j = jlist[jj]; j &= NEIGHMASK; @@ -326,29 +361,29 @@ void FixShearHistory::pre_exchange_newton() commflag = NPARTNER; comm->reverse_comm_fix(this,0); - // get page chunks to store atom IDs and shear history for owned+ghost atoms + // get page chunks to store partner IDs and values for owned+ghost atoms for (ii = 0; ii < inum; ii++) { i = ilist[ii]; n = npartner[i]; - partner[i] = ipage->get(n); - shearpartner[i] = dpage->get(dnum*n); - if (partner[i] == NULL || shearpartner[i] == NULL) { - error->one(FLERR,"Shear history overflow, boost neigh_modify one"); + partner[i] = ipage_atom->get(n); + valuepartner[i] = dpage_atom->get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) { + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); } } for (i = nlocal_neigh; i < nall_neigh; i++) { n = npartner[i]; - partner[i] = ipage->get(n); - shearpartner[i] = dpage->get(dnum*n); - if (partner[i] == NULL || shearpartner[i] == NULL) { - error->one(FLERR,"Shear history overflow, boost neigh_modify one"); + partner[i] = ipage_atom->get(n); + valuepartner[i] = dpage_atom->get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) { + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); } } // 2nd loop over neighbor list - // store atom IDs and shear history for owned+ghost atoms + // store partner IDs and values for owned+ghost atoms // re-zero npartner to use as counter for (i = 0; i < nall_neigh; i++) npartner[i] = 0; @@ -356,40 +391,40 @@ void FixShearHistory::pre_exchange_newton() for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; - allshear = firstshear[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { - shear = &allshear[dnum*jj]; + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; j = jlist[jj]; j &= NEIGHMASK; m = npartner[i]++; partner[i][m] = tag[j]; - memcpy(&shearpartner[i][dnum*m],shear,dnumbytes); + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); m = npartner[j]++; partner[j][m] = tag[i]; - shearj = &shearpartner[j][dnum*m]; - for (n = 0; n < dnum; n++) shearj[n] = -shear[n]; + jvalues = &valuepartner[j][dnum*m]; + for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n]; } } } // perform reverse comm to augment - // owned atom partner/shearpartner with ghost info + // owned atom partner/valuepartner with ghost info // use variable variant b/c size of packed data can be arbitrarily large // if many touching neighbors for large particle commflag = PERPARTNER; comm->reverse_comm_fix_variable(this); - // set maxtouch = max # of partners of any owned atom + // set maxpartner = max # of partners of any owned atom // bump up comm->maxexchange_fix if necessary - maxtouch = 0; - for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]); - comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1); + maxpartner = 0; + for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1); // zero npartner values from previous nlocal_neigh to current nlocal @@ -398,49 +433,47 @@ void FixShearHistory::pre_exchange_newton() } /* ---------------------------------------------------------------------- - newton off version, for sphere/sphere contacts - newton OFF works with smaller vectors that don't include ghost info + newton OFF version + do not need partner values from ghost atoms + assume J values are negative of I values ------------------------------------------------------------------------- */ -void FixShearHistory::pre_exchange_no_newton() +void FixNeighHistory::pre_exchange_no_newton() { int i,j,ii,jj,m,n,inum,jnum; int *ilist,*jlist,*numneigh,**firstneigh; - int *touch,**firsttouch; - double *shear,*shearj,*allshear,**firstshear; + int *allflags; + double *allvalues,*onevalues,*jvalues; // NOTE: all operations until very end are with nlocal_neigh <= current nlocal // b/c previous neigh list was built with nlocal_neigh // nlocal can be larger if other fixes added atoms at this pre_exchange() - // zero npartner for owned atoms - // clear 2 page data structures - - for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; + // clear two paged data structures - ipage->reset(); - dpage->reset(); + ipage_atom->reset(); + dpage_atom->reset(); // 1st loop over neighbor list // calculate npartner for owned atoms + for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; + tagint *tag = atom->tag; NeighList *list = pair->list; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - firsttouch = list->listhistory->firstneigh; - firstshear = list->listhistory->firstdouble; for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { + if (allflags[jj]) { npartner[i]++; j = jlist[jj]; j &= NEIGHMASK; @@ -449,19 +482,19 @@ void FixShearHistory::pre_exchange_no_newton() } } - // get page chunks to store atom IDs and shear history for owned atoms + // get page chunks to store partner IDs and values for owned atoms for (ii = 0; ii < inum; ii++) { i = ilist[ii]; n = npartner[i]; - partner[i] = ipage->get(n); - shearpartner[i] = dpage->get(dnum*n); - if (partner[i] == NULL || shearpartner[i] == NULL) - error->one(FLERR,"Shear history overflow, boost neigh_modify one"); + partner[i] = ipage_atom->get(n); + valuepartner[i] = dpage_atom->get(dnum*n); + if (partner[i] == NULL || valuepartner[i] == NULL) + error->one(FLERR,"Neighbor history overflow, boost neigh_modify one"); } // 2nd loop over neighbor list - // store atom IDs and shear history for owned atoms + // store partner IDs and values for owned+ghost atoms // re-zero npartner to use as counter for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0; @@ -469,34 +502,34 @@ void FixShearHistory::pre_exchange_no_newton() for (ii = 0; ii < inum; ii++) { i = ilist[ii]; jlist = firstneigh[i]; - allshear = firstshear[i]; jnum = numneigh[i]; - touch = firsttouch[i]; + allflags = firstflag[i]; + allvalues = firstvalue[i]; for (jj = 0; jj < jnum; jj++) { - if (touch[jj]) { - shear = &allshear[dnum*jj]; + if (allflags[jj]) { + onevalues = &allvalues[dnum*jj]; j = jlist[jj]; j &= NEIGHMASK; m = npartner[i]++; partner[i][m] = tag[j]; - memcpy(&shearpartner[i][dnum*m],shear,dnumbytes); + memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes); if (j < nlocal_neigh) { m = npartner[j]++; partner[j][m] = tag[i]; - shearj = &shearpartner[j][dnum*m]; - for (n = 0; n < dnum; n++) shearj[n] = -shear[n]; + jvalues = &valuepartner[j][dnum*m]; + for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n]; } } } } - // set maxtouch = max # of partners of any owned atom + // set maxpartner = max # of partners of any owned atom // bump up comm->maxexchange_fix if necessary - maxtouch = 0; - for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]); - comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1); + maxpartner = 0; + for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); // zero npartner values from previous nlocal_neigh to current nlocal @@ -506,14 +539,107 @@ void FixShearHistory::pre_exchange_no_newton() /* ---------------------------------------------------------------------- */ -void FixShearHistory::min_pre_exchange() +void FixNeighHistory::min_pre_exchange() { pre_exchange(); } +/* ---------------------------------------------------------------------- + called after neighbor list is build + recover history info stored temporarily in per-atom partner lists + and store afresh in per-neighbor firstflag and firstvalue lists +------------------------------------------------------------------------- */ + +void FixNeighHistory::post_neighbor() +{ + int i,j,m,ii,jj,nn,np,inum,jnum,rflag; + tagint jtag; + int *ilist,*jlist,*numneigh,**firstneigh; + int *allflags; + double *allvalues; + + // store atom counts used for new neighbor list which was just built + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + nlocal_neigh = nlocal; + nall_neigh = nall; + + // realloc firstflag and firstvalue if needed + + if (maxatom < nlocal) { + memory->sfree(firstflag); + memory->sfree(firstvalue); + maxatom = nall; + firstflag = (int **) + memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag"); + firstvalue = (double **) + memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue"); + } + + // loop over newly built neighbor list + // repopulate entire per-neighbor data structs + // whether with old-neigh partner info or zeroes + + ipage_neigh->reset(); + dpage_neigh->reset(); + + tagint *tag = atom->tag; + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + firstflag[i] = allflags = ipage_neigh->get(jnum); + firstvalue[i] = allvalues = dpage_neigh->get(jnum*dnum); + np = npartner[i]; + nn = 0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + rflag = sbmask(j); + j &= NEIGHMASK; + jlist[jj] = j; + + // rflag = 1 if r < radsum in npair_size() method + // preserve neigh history info if tag[j] is in old-neigh partner list + // this test could be more geometrically precise for two sphere/line/tri + + if (rflag) { + jtag = tag[j]; + for (m = 0; m < np; m++) + if (partner[i][m] == jtag) break; + if (m < np) { + allflags[jj] = 1; + memcpy(&allvalues[nn],&valuepartner[i][dnum*m],dnumbytes); + } else { + allflags[jj] = 0; + memcpy(&allvalues[nn],zeroes,dnumbytes); + } + } else { + allflags[jj] = 0; + memcpy(&allvalues[nn],zeroes,dnumbytes); + } + nn += dnum; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixNeighHistory::min_post_neighbor() +{ + post_neighbor(); +} + /* ---------------------------------------------------------------------- */ -void FixShearHistory::post_run() +void FixNeighHistory::post_run() { pre_exchange(); } @@ -522,17 +648,21 @@ void FixShearHistory::post_run() memory usage of local atom-based arrays ------------------------------------------------------------------------- */ -double FixShearHistory::memory_usage() +double FixNeighHistory::memory_usage() { int nmax = atom->nmax; - double bytes = nmax * sizeof(int); - bytes += nmax * sizeof(tagint *); - bytes += nmax * sizeof(double *); + double bytes = nmax * sizeof(int); // npartner + bytes += nmax * sizeof(tagint *); // partner + bytes += nmax * sizeof(double *); // valuepartner + bytes += maxatom * sizeof(int *); // firstflag + bytes += maxatom * sizeof(double *); // firstvalue int nmypage = comm->nthreads; for (int i = 0; i < nmypage; i++) { - bytes += ipage[i].size(); - bytes += dpage[i].size(); + bytes += ipage_atom[i].size(); + bytes += dpage_atom[i].size(); + bytes += ipage_neigh[i].size(); + bytes += dpage_neigh[i].size(); } return bytes; @@ -542,38 +672,38 @@ double FixShearHistory::memory_usage() allocate local atom-based arrays ------------------------------------------------------------------------- */ -void FixShearHistory::grow_arrays(int nmax) +void FixNeighHistory::grow_arrays(int nmax) { - memory->grow(npartner,nmax,"shear_history:npartner"); + memory->grow(npartner,nmax,"neighbor_history:npartner"); partner = (tagint **) memory->srealloc(partner,nmax*sizeof(tagint *), - "shear_history:partner"); - shearpartner = (double **) memory->srealloc(shearpartner, + "neighbor_history:partner"); + valuepartner = (double **) memory->srealloc(valuepartner, nmax*sizeof(double *), - "shear_history:shearpartner"); + "neighbor_history:valuepartner"); } /* ---------------------------------------------------------------------- copy values within local atom-based arrays ------------------------------------------------------------------------- */ -void FixShearHistory::copy_arrays(int i, int j, int delflag) +void FixNeighHistory::copy_arrays(int i, int j, int delflag) { - // just copy pointers for partner and shearpartner - // b/c can't overwrite chunk allocation inside ipage,dpage + // just copy pointers for partner and valuepartner + // b/c can't overwrite chunk allocation inside ipage_atom,dpage_atom // incoming atoms in unpack_exchange just grab new chunks // so are orphaning chunks for migrating atoms - // OK, b/c will reset ipage,dpage on next reneighboring + // OK, b/c will reset ipage_atom,dpage_atom on next reneighboring npartner[j] = npartner[i]; partner[j] = partner[i]; - shearpartner[j] = shearpartner[i]; + valuepartner[j] = valuepartner[i]; } /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ -void FixShearHistory::set_arrays(int i) +void FixNeighHistory::set_arrays(int i) { npartner[i] = 0; } @@ -582,7 +712,7 @@ void FixShearHistory::set_arrays(int i) only called by Comm::reverse_comm_fix_variable for PERPARTNER mode ------------------------------------------------------------------------- */ -int FixShearHistory::pack_reverse_comm_size(int n, int first) +int FixNeighHistory::pack_reverse_comm_size(int n, int first) { int i,last; @@ -590,7 +720,7 @@ int FixShearHistory::pack_reverse_comm_size(int n, int first) last = first + n; for (i = first; i < last; i++) - m += 1 + (dnum+1)*npartner[i]; + m += 1 + 4*npartner[i]; return m; } @@ -599,7 +729,7 @@ int FixShearHistory::pack_reverse_comm_size(int n, int first) two modes: NPARTNER and PERPARTNER ------------------------------------------------------------------------- */ -int FixShearHistory::pack_reverse_comm(int n, int first, double *buf) +int FixNeighHistory::pack_reverse_comm(int n, int first, double *buf) { int i,k,last; @@ -615,11 +745,11 @@ int FixShearHistory::pack_reverse_comm(int n, int first, double *buf) buf[m++] = npartner[i]; for (k = 0; k < npartner[i]; k++) { buf[m++] = partner[i][k]; - memcpy(&buf[m],&shearpartner[i][dnum*k],dnumbytes); + memcpy(&buf[m],&valuepartner[i][dnum*k],dnumbytes); m += dnum; } } - } else error->all(FLERR,"Unsupported comm mode in shear history"); + } else error->all(FLERR,"Unsupported comm mode in neighbor history"); return m; } @@ -628,7 +758,7 @@ int FixShearHistory::pack_reverse_comm(int n, int first, double *buf) two modes: NPARTNER and PERPARTNER ------------------------------------------------------------------------- */ -void FixShearHistory::unpack_reverse_comm(int n, int *list, double *buf) +void FixNeighHistory::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,k,kk,ncount; @@ -646,18 +776,18 @@ void FixShearHistory::unpack_reverse_comm(int n, int *list, double *buf) for (k = 0; k < ncount; k++) { kk = npartner[j]++; partner[j][kk] = static_cast<tagint> (buf[m++]); - memcpy(&shearpartner[j][dnum*kk],&buf[m],dnumbytes); + memcpy(&valuepartner[j][dnum*kk],&buf[m],dnumbytes); m += dnum; } } - } else error->all(FLERR,"Unsupported comm mode in shear history"); + } else error->all(FLERR,"Unsupported comm mode in neighbor history"); } /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ -int FixShearHistory::pack_exchange(int i, double *buf) +int FixNeighHistory::pack_exchange(int i, double *buf) { // NOTE: how do I know comm buf is big enough if extreme # of touching neighs // Comm::BUFEXTRA may need to be increased @@ -666,7 +796,7 @@ int FixShearHistory::pack_exchange(int i, double *buf) buf[m++] = npartner[i]; for (int n = 0; n < npartner[i]; n++) { buf[m++] = partner[i][n]; - memcpy(&buf[m],&shearpartner[i][dnum*n],dnumbytes); + memcpy(&buf[m],&valuepartner[i][dnum*n],dnumbytes); m += dnum; } return m; @@ -676,18 +806,18 @@ int FixShearHistory::pack_exchange(int i, double *buf) unpack values in local atom-based arrays from exchange with another proc ------------------------------------------------------------------------- */ -int FixShearHistory::unpack_exchange(int nlocal, double *buf) +int FixNeighHistory::unpack_exchange(int nlocal, double *buf) { - // allocate new chunks from ipage,dpage for incoming values + // allocate new chunks from ipage_atom,dpage_atom for incoming values int m = 0; npartner[nlocal] = static_cast<int> (buf[m++]); - maxtouch = MAX(maxtouch,npartner[nlocal]); - partner[nlocal] = ipage->get(npartner[nlocal]); - shearpartner[nlocal] = dpage->get(dnum*npartner[nlocal]); + maxpartner = MAX(maxpartner,npartner[nlocal]); + partner[nlocal] = ipage_atom->get(npartner[nlocal]); + valuepartner[nlocal] = dpage_atom->get(dnum*npartner[nlocal]); for (int n = 0; n < npartner[nlocal]; n++) { partner[nlocal][n] = static_cast<tagint> (buf[m++]); - memcpy(&shearpartner[nlocal][dnum*n],&buf[m],dnumbytes); + memcpy(&valuepartner[nlocal][dnum*n],&buf[m],dnumbytes); m += dnum; } return m; @@ -697,13 +827,13 @@ int FixShearHistory::unpack_exchange(int nlocal, double *buf) pack values in local atom-based arrays for restart file ------------------------------------------------------------------------- */ -int FixShearHistory::pack_restart(int i, double *buf) +int FixNeighHistory::pack_restart(int i, double *buf) { int m = 1; buf[m++] = npartner[i]; for (int n = 0; n < npartner[i]; n++) { buf[m++] = partner[i][n]; - memcpy(&buf[m],&shearpartner[i][dnum*n],dnumbytes); + memcpy(&buf[m],&valuepartner[i][dnum*n],dnumbytes); m += dnum; } buf[0] = m; @@ -714,11 +844,11 @@ int FixShearHistory::pack_restart(int i, double *buf) unpack values from atom->extra array to restart the fix ------------------------------------------------------------------------- */ -void FixShearHistory::unpack_restart(int nlocal, int nth) +void FixNeighHistory::unpack_restart(int nlocal, int nth) { - // ipage = NULL if being called from granular pair style init() + // ipage_atom = NULL if being called from granular pair style init() - if (ipage == NULL) allocate_pages(); + if (ipage_atom == NULL) allocate_pages(); // skip to Nth set of extra values @@ -728,15 +858,15 @@ void FixShearHistory::unpack_restart(int nlocal, int nth) for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]); m++; - // allocate new chunks from ipage,dpage for incoming values + // allocate new chunks from ipage_atom,dpage_atom for incoming values npartner[nlocal] = static_cast<int> (extra[nlocal][m++]); - maxtouch = MAX(maxtouch,npartner[nlocal]); - partner[nlocal] = ipage->get(npartner[nlocal]); - shearpartner[nlocal] = dpage->get(dnum*npartner[nlocal]); + maxpartner = MAX(maxpartner,npartner[nlocal]); + partner[nlocal] = ipage_atom->get(npartner[nlocal]); + valuepartner[nlocal] = dpage_atom->get(dnum*npartner[nlocal]); for (int n = 0; n < npartner[nlocal]; n++) { partner[nlocal][n] = static_cast<tagint> (extra[nlocal][m++]); - memcpy(&shearpartner[nlocal][dnum*n],&extra[nlocal][m],dnumbytes); + memcpy(&valuepartner[nlocal][dnum*n],&extra[nlocal][m],dnumbytes); m += dnum; } } @@ -745,20 +875,20 @@ void FixShearHistory::unpack_restart(int nlocal, int nth) maxsize of any atom's restart data ------------------------------------------------------------------------- */ -int FixShearHistory::maxsize_restart() +int FixNeighHistory::maxsize_restart() { - // maxtouch_all = max # of touching partners across all procs + // maxpartner_all = max # of touching partners across all procs - int maxtouch_all; - MPI_Allreduce(&maxtouch,&maxtouch_all,1,MPI_INT,MPI_MAX,world); - return (dnum+1)*maxtouch_all + 2; + int maxpartner_all; + MPI_Allreduce(&maxpartner,&maxpartner_all,1,MPI_INT,MPI_MAX,world); + return (dnum+1)*maxpartner_all + 2; } /* ---------------------------------------------------------------------- size of atom nlocal's restart data ------------------------------------------------------------------------- */ -int FixShearHistory::size_restart(int nlocal) +int FixNeighHistory::size_restart(int nlocal) { return (dnum+1)*npartner[nlocal] + 2; } diff --git a/src/fix_shear_history.h b/src/fix_neigh_history.h similarity index 59% rename from src/fix_shear_history.h rename to src/fix_neigh_history.h index 00f219f0340cd7f0422c3876bc32b1edeab2ec7d..7aed2d60357dfb81c0ff13ee357214f0c5dda4dc 100644 --- a/src/fix_shear_history.h +++ b/src/fix_neigh_history.h @@ -13,38 +13,35 @@ #ifdef FIX_CLASS -FixStyle(SHEAR_HISTORY,FixShearHistory) +FixStyle(NEIGH_HISTORY,FixNeighHistory) #else -#ifndef LMP_FIX_SHEAR_HISTORY_H -#define LMP_FIX_SHEAR_HISTORY_H +#ifndef LMP_FIX_NEIGH_HISTORY_H +#define LMP_FIX_NEIGH_HISTORY_H #include "fix.h" #include "my_page.h" namespace LAMMPS_NS { -class FixShearHistory : public Fix { - //friend class Neighbor; - //friend class PairGranHookeHistory; - friend class PairLineGranHookeHistory; - friend class PairTriGranHookeHistory; - +class FixNeighHistory : public Fix { public: int nlocal_neigh; // nlocal at last time neigh list was built int nall_neigh; // ditto for nlocal+nghost - int *npartner; // # of touching partners of each atom - tagint **partner; // global atom IDs for the partners - double **shearpartner; // shear values with the partner - class Pair *pair; // ptr to pair style that uses shear history + int **firstflag; // ptr to each atom's neighbor flsg + double **firstvalue; // ptr to each atom's values + class Pair *pair; // ptr to pair style that uses neighbor history - FixShearHistory(class LAMMPS *, int, char **); - ~FixShearHistory(); + FixNeighHistory(class LAMMPS *, int, char **); + ~FixNeighHistory(); int setmask(); void init(); - virtual void pre_exchange(); + void setup_post_neighbor(); + void pre_exchange(); void min_pre_exchange(); + virtual void post_neighbor(); + void min_post_neighbor(); void post_run(); double memory_usage(); @@ -64,20 +61,40 @@ class FixShearHistory : public Fix { protected: int newton_pair; // same as force setting - int dnum,dnumbytes; // dnum = # of shear history values + int dnum,dnumbytes; // dnum = # of values per neighbor int onesided; // 1 for line/tri history, else 0 - int maxtouch; // max # of touching partners for my atoms + int maxatom; // max size of firstflag and firstvalue int commflag; // mode of reverse comm to get ghost info + double *zeroes; + + // per-atom data structures + // partners = flagged neighbors of an atom + + int *npartner; // # of partners of each atom + tagint **partner; // global atom IDs for the partners + double **valuepartner; // values for the partners + int maxpartner; // max # of partners for any of my atoms + + // per-atom data structs pointed to by partner & valuepartner int pgsize,oneatom; // copy of settings in Neighbor - MyPage<tagint> *ipage; // pages of partner atom IDs - MyPage<double> *dpage; // pages of shear history with partners + MyPage<tagint> *ipage_atom; // pages of partner atom IDs + MyPage<double> *dpage_atom; // pages of partner values + + // per-neighbor data structs pointed to by firstflag & firstvalue - void pre_exchange_onesided(); - void pre_exchange_newton(); - void pre_exchange_no_newton(); + MyPage<int> *ipage_neigh; // pages of local atom indices + MyPage<double> *dpage_neigh; // pages of partner values + + virtual void pre_exchange_onesided(); + virtual void pre_exchange_newton(); + virtual void pre_exchange_no_newton(); void allocate_pages(); + + inline int sbmask(int j) const { + return j >> SBBITS & 3; + } }; } diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index c99c7a34bdf66ce9a40041689a8a9f30a5686518..f95bde95d06fff004d53988eef5f9da7e2ffe6e3 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -353,6 +353,15 @@ FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) : } else if (strcmp(arg[iarg],"disc") == 0) { iarg++; + // keywords erate, strain, and ext are also parsed in fix/nh/uef + + } else if (strcmp(arg[iarg],"erate") == 0) { + iarg += 3; + } else if (strcmp(arg[iarg],"strain") == 0) { + iarg += 3; + } else if (strcmp(arg[iarg],"ext") == 0) { + iarg += 2; + } else error->all(FLERR,"Illegal fix nvt/npt/nph command"); } diff --git a/src/fix_nve.cpp b/src/fix_nve.cpp index 8dd016024fd760fcf795e0394dc29c346da6a732..64ec3373f12c81137a7884c25f358bcc2e609174 100644 --- a/src/fix_nve.cpp +++ b/src/fix_nve.cpp @@ -58,7 +58,6 @@ void FixNVE::init() step_respa = ((Respa *) update->integrate)->step; } - /* ---------------------------------------------------------------------- allow for both per-type and per-atom mass ------------------------------------------------------------------------- */ diff --git a/src/input.cpp b/src/input.cpp index 7d11b8741b976ba47678b8742ded9f1e06ef8bdd..23b89d3040d7816546d5d701609bf0cf87bcac05 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -18,7 +18,7 @@ #include <errno.h> #include <ctype.h> #include <unistd.h> -#include "sys/stat.h" +#include <sys/stat.h> #include "input.h" #include "style_command.h" #include "universe.h" diff --git a/src/main.cpp b/src/main.cpp index 7401183fea1f79a2d2a0cbd3425f20d763c0f7c4..82dac5af6d3338afc7703b1f32974be4cd5e99f8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,6 +22,10 @@ #include <fenv.h> #endif +#ifdef FFT_FFTW3 +#include <fftw3.h> +#endif + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- @@ -62,4 +66,10 @@ int main(int argc, char **argv) #endif MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); + +#ifdef FFT_FFTW3 + // tell fftw3 to delete its global memory pool + // and thus avoid bogus valgrind memory leak reports + fftw_cleanup(); +#endif } diff --git a/src/min.cpp b/src/min.cpp index af23629cad8eafefcaab22f5752ee8d87ea9014a..653cac71e6af5b3a52a00d808a47319242593def 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -246,6 +246,7 @@ void Min::setup(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // remove these restriction eventually @@ -345,6 +346,7 @@ void Min::setup_minimal(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -503,12 +505,15 @@ double Min::energy_force(int resetflag) if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); timer->stamp(Timer::COMM); if (modify->n_min_pre_neighbor) { - timer->stamp(); modify->min_pre_neighbor(); timer->stamp(Timer::MODIFY); } neighbor->build(); timer->stamp(Timer::NEIGH); + if (modify->n_min_post_neighbor) { + modify->min_post_neighbor(); + timer->stamp(Timer::MODIFY); + } } ev_set(update->ntimestep); diff --git a/src/modify.cpp b/src/modify.cpp index 4516788aa940df4c7e4b431c7a7fe1f0208f8dc8..64970f2cf98b10cbf9ad9f7042392d5e2bf827b6 100644 --- a/src/modify.cpp +++ b/src/modify.cpp @@ -42,7 +42,7 @@ Modify::Modify(LAMMPS *lmp) : Pointers(lmp) { nfix = maxfix = 0; n_initial_integrate = n_post_integrate = 0; - n_pre_exchange = n_pre_neighbor = 0; + n_pre_exchange = n_pre_neighbor = n_post_neighbor = 0; n_pre_force = n_pre_reverse = n_post_force = 0; n_final_integrate = n_end_of_step = n_thermo_energy = 0; n_thermo_energy_atom = 0; @@ -54,14 +54,14 @@ Modify::Modify(LAMMPS *lmp) : Pointers(lmp) fix = NULL; fmask = NULL; list_initial_integrate = list_post_integrate = NULL; - list_pre_exchange = list_pre_neighbor = NULL; + list_pre_exchange = list_pre_neighbor = list_post_neighbor = NULL; list_pre_force = list_pre_reverse = list_post_force = NULL; list_final_integrate = list_end_of_step = NULL; list_thermo_energy = list_thermo_energy_atom = NULL; list_initial_integrate_respa = list_post_integrate_respa = NULL; list_pre_force_respa = list_post_force_respa = NULL; list_final_integrate_respa = NULL; - list_min_pre_exchange = list_min_pre_neighbor = NULL; + list_min_pre_exchange = list_min_pre_neighbor = list_min_post_neighbor = NULL; list_min_pre_force = list_min_pre_reverse = list_min_post_force = NULL; list_min_energy = NULL; @@ -110,7 +110,7 @@ Modify::~Modify() // delete all fixes // do it via delete_fix() so callbacks in Atom are also updated correctly - while (nfix) delete_fix(fix[0]->id); + while (nfix) delete_fix(0); memory->sfree(fix); memory->destroy(fmask); @@ -123,6 +123,7 @@ Modify::~Modify() delete [] list_post_integrate; delete [] list_pre_exchange; delete [] list_pre_neighbor; + delete [] list_post_neighbor; delete [] list_pre_force; delete [] list_pre_reverse; delete [] list_post_force; @@ -137,6 +138,7 @@ Modify::~Modify() delete [] list_final_integrate_respa; delete [] list_min_pre_exchange; delete [] list_min_pre_neighbor; + delete [] list_min_post_neighbor; delete [] list_min_pre_force; delete [] list_min_pre_reverse; delete [] list_min_post_force; @@ -169,6 +171,7 @@ void Modify::init() list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate); list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange); list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor); + list_init(POST_NEIGHBOR,n_post_neighbor,list_post_neighbor); list_init(PRE_FORCE,n_pre_force,list_pre_force); list_init(PRE_REVERSE,n_pre_reverse,list_pre_reverse); list_init(POST_FORCE,n_post_force,list_post_force); @@ -190,6 +193,7 @@ void Modify::init() list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange); list_init(MIN_PRE_NEIGHBOR,n_min_pre_neighbor,list_min_pre_neighbor); + list_init(MIN_POST_NEIGHBOR,n_min_post_neighbor,list_min_post_neighbor); list_init(MIN_PRE_FORCE,n_min_pre_force,list_min_pre_force); list_init(MIN_PRE_REVERSE,n_min_pre_reverse,list_min_pre_reverse); list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force); @@ -329,6 +333,21 @@ void Modify::setup_pre_neighbor() fix[list_min_pre_neighbor[i]]->setup_pre_neighbor(); } +/* ---------------------------------------------------------------------- + setup post_neighbor call, only for fixes that define post_neighbor + called from Verlet, RESPA +------------------------------------------------------------------------- */ + +void Modify::setup_post_neighbor() +{ + if (update->whichflag == 1) + for (int i = 0; i < n_post_neighbor; i++) + fix[list_post_neighbor[i]]->setup_post_neighbor(); + else if (update->whichflag == 2) + for (int i = 0; i < n_min_post_neighbor; i++) + fix[list_min_post_neighbor[i]]->setup_post_neighbor(); +} + /* ---------------------------------------------------------------------- setup pre_force call, only for fixes that define pre_force called from Verlet, RESPA, Min @@ -399,6 +418,16 @@ void Modify::pre_neighbor() fix[list_pre_neighbor[i]]->pre_neighbor(); } +/* ---------------------------------------------------------------------- + post_neighbor call, only for relevant fixes +------------------------------------------------------------------------- */ + +void Modify::post_neighbor() +{ + for (int i = 0; i < n_post_neighbor; i++) + fix[list_post_neighbor[i]]->post_neighbor(); +} + /* ---------------------------------------------------------------------- pre_force call, only for relevant fixes ------------------------------------------------------------------------- */ @@ -589,6 +618,16 @@ void Modify::min_pre_neighbor() fix[list_min_pre_neighbor[i]]->min_pre_neighbor(); } +/* ---------------------------------------------------------------------- + minimizer post-neighbor call, only for relevant fixes +------------------------------------------------------------------------- */ + +void Modify::min_post_neighbor() +{ + for (int i = 0; i < n_min_post_neighbor; i++) + fix[list_min_post_neighbor[i]]->min_post_neighbor(); +} + /* ---------------------------------------------------------------------- minimizer pre-force call, only for relevant fixes ------------------------------------------------------------------------- */ @@ -863,9 +902,9 @@ void Modify::add_fix(int narg, char **arg, int trysuffix) fix[ifix]->restart(state_restart_global[i]); used_restart_global[i] = 1; if (comm->me == 0) { - if (screen) + if (screen) fprintf(screen,"Resetting global fix info from restart file:\n"); - if (logfile) + if (logfile) fprintf(logfile,"Resetting global fix info from restart file:\n"); if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", fix[ifix]->style,fix[ifix]->id); @@ -885,9 +924,9 @@ void Modify::add_fix(int narg, char **arg, int trysuffix) fix[ifix]->unpack_restart(j,index_restart_peratom[i]); fix[ifix]->restart_reset = 1; if (comm->me == 0) { - if (screen) + if (screen) fprintf(screen,"Resetting peratom fix info from restart file:\n"); - if (logfile) + if (logfile) fprintf(logfile,"Resetting peratom fix info from restart file:\n"); if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", fix[ifix]->style,fix[ifix]->id); @@ -944,7 +983,12 @@ void Modify::delete_fix(const char *id) { int ifix = find_fix(id); if (ifix < 0) error->all(FLERR,"Could not find fix ID to delete"); - delete fix[ifix]; + delete_fix(ifix); +} + +void Modify::delete_fix(int ifix) +{ + if (fix[ifix]) delete fix[ifix]; atom->update_callback(ifix); // move other Fixes and fmask down in list one slot @@ -1409,24 +1453,24 @@ void Modify::restart_deallocate(int flag) if (flag && comm->me == 0) { int i; for (i = 0; i < nfix_restart_global; i++) - if (used_restart_global[i] == 0) break; + if (used_restart_global[i] == 0) break; if (i == nfix_restart_global) { - if (screen) + if (screen) fprintf(screen,"All restart file global fix info " "was re-assigned\n"); - if (logfile) + if (logfile) fprintf(logfile,"All restart file global fix info " "was re-assigned\n"); } else { - if (screen) fprintf(screen,"Unused restart file global fix info:\n"); - if (logfile) fprintf(logfile,"Unused restart file global fix info:\n"); - for (i = 0; i < nfix_restart_global; i++) { - if (used_restart_global[i]) continue; - if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", - style_restart_global[i],id_restart_global[i]); - if (logfile) fprintf(logfile," fix style: %s, fix ID: %s\n", - style_restart_global[i],id_restart_global[i]); - } + if (screen) fprintf(screen,"Unused restart file global fix info:\n"); + if (logfile) fprintf(logfile,"Unused restart file global fix info:\n"); + for (i = 0; i < nfix_restart_global; i++) { + if (used_restart_global[i]) continue; + if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", + style_restart_global[i],id_restart_global[i]); + if (logfile) fprintf(logfile," fix style: %s, fix ID: %s\n", + style_restart_global[i],id_restart_global[i]); + } } } @@ -1445,24 +1489,24 @@ void Modify::restart_deallocate(int flag) if (flag && comm->me == 0) { int i; for (i = 0; i < nfix_restart_peratom; i++) - if (used_restart_peratom[i] == 0) break; + if (used_restart_peratom[i] == 0) break; if (i == nfix_restart_peratom) { - if (screen) + if (screen) fprintf(screen,"All restart file peratom fix info " "was re-assigned\n"); - if (logfile) + if (logfile) fprintf(logfile,"All restart file peratom fix info " "was re-assigned\n"); } else { - if (screen) fprintf(screen,"Unused restart file peratom fix info:\n"); - if (logfile) fprintf(logfile,"Unused restart file peratom fix info:\n"); - for (i = 0; i < nfix_restart_peratom; i++) { - if (used_restart_peratom[i]) continue; - if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", - style_restart_peratom[i],id_restart_peratom[i]); - if (logfile) fprintf(logfile," fix style: %s, fix ID: %s\n", - style_restart_peratom[i],id_restart_peratom[i]); - } + if (screen) fprintf(screen,"Unused restart file peratom fix info:\n"); + if (logfile) fprintf(logfile,"Unused restart file peratom fix info:\n"); + for (i = 0; i < nfix_restart_peratom; i++) { + if (used_restart_peratom[i]) continue; + if (screen) fprintf(screen," fix style: %s, fix ID: %s\n", + style_restart_peratom[i],id_restart_peratom[i]); + if (logfile) fprintf(logfile," fix style: %s, fix ID: %s\n", + style_restart_peratom[i],id_restart_peratom[i]); + } } } diff --git a/src/modify.h b/src/modify.h index d825d5c4efbf543a709e766253a0a7e600cf8dcf..3e20df5aac152affa32c8b7ff642ed621ffc2ba8 100644 --- a/src/modify.h +++ b/src/modify.h @@ -29,12 +29,13 @@ class Modify : protected Pointers { public: int nfix,maxfix; - int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor; + int n_initial_integrate,n_post_integrate,n_pre_exchange; + int n_pre_neighbor,n_post_neighbor; int n_pre_force,n_pre_reverse,n_post_force; int n_final_integrate,n_end_of_step,n_thermo_energy,n_thermo_energy_atom; int n_initial_integrate_respa,n_post_integrate_respa; int n_pre_force_respa,n_post_force_respa,n_final_integrate_respa; - int n_min_pre_exchange,n_min_pre_neighbor; + int n_min_pre_exchange,n_min_pre_neighbor,n_min_post_neighbor; int n_min_pre_force,n_min_pre_reverse,n_min_post_force,n_min_energy; int restart_pbc_any; // 1 if any fix sets restart_pbc @@ -53,12 +54,14 @@ class Modify : protected Pointers { virtual void setup(int); virtual void setup_pre_exchange(); virtual void setup_pre_neighbor(); + virtual void setup_post_neighbor(); virtual void setup_pre_force(int); virtual void setup_pre_reverse(int, int); virtual void initial_integrate(int); virtual void post_integrate(); virtual void pre_exchange(); virtual void pre_neighbor(); + virtual void post_neighbor(); virtual void pre_force(int); virtual void pre_reverse(int,int); virtual void post_force(int); @@ -78,6 +81,7 @@ class Modify : protected Pointers { virtual void min_pre_exchange(); virtual void min_pre_neighbor(); + virtual void min_post_neighbor(); virtual void min_pre_force(int); virtual void min_pre_reverse(int,int); virtual void min_post_force(int); @@ -95,6 +99,7 @@ class Modify : protected Pointers { void add_fix(int, char **, int trysuffix=1); void modify_fix(int, char **); void delete_fix(const char *); + void delete_fix(int); int find_fix(const char *); int find_fix_by_style(const char *); int check_package(const char *); @@ -122,14 +127,14 @@ class Modify : protected Pointers { // lists of fixes to apply at different stages of timestep int *list_initial_integrate,*list_post_integrate; - int *list_pre_exchange,*list_pre_neighbor; + int *list_pre_exchange,*list_pre_neighbor,*list_post_neighbor; int *list_pre_force,*list_pre_reverse,*list_post_force; int *list_final_integrate,*list_end_of_step,*list_thermo_energy; int *list_thermo_energy_atom; int *list_initial_integrate_respa,*list_post_integrate_respa; int *list_pre_force_respa,*list_post_force_respa; int *list_final_integrate_respa; - int *list_min_pre_exchange,*list_min_pre_neighbor; + int *list_min_pre_exchange,*list_min_pre_neighbor,*list_min_post_neighbor; int *list_min_pre_force,*list_min_pre_reverse,*list_min_post_force; int *list_min_energy; diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index dde544a69f181e669ca349e79b08e3ef101103d7..934b9f7d9b0409825efc3a51d269df88ba46a15b 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -40,16 +40,18 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) ilist = NULL; numneigh = NULL; firstneigh = NULL; - firstdouble = NULL; // defaults, but may be reset by post_constructor() occasional = 0; ghost = 0; ssa = 0; + history = 0; + respaouter = 0; + respamiddle = 0; + respainner = 0; copy = 0; copymode = 0; - dnum = 0; // ptrs @@ -60,17 +62,24 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) listskip = NULL; listfull = NULL; - listhistory = NULL; - fix_history = NULL; - - respamiddle = 0; - listinner = NULL; - listmiddle = NULL; - fix_bond = NULL; ipage = NULL; - dpage = NULL; + + // extra rRESPA lists + + inum_inner = gnum_inner = 0; + ilist_inner = NULL; + numneigh_inner = NULL; + firstneigh_inner = NULL; + + inum_middle = gnum_middle = 0; + ilist_middle = NULL; + numneigh_middle = NULL; + firstneigh_middle = NULL; + + ipage_inner = NULL; + ipage_middle = NULL; // Kokkos package @@ -92,10 +101,21 @@ NeighList::~NeighList() memory->destroy(ilist); memory->destroy(numneigh); memory->sfree(firstneigh); - memory->sfree(firstdouble); - delete [] ipage; - delete [] dpage; + } + + if (respainner) { + memory->destroy(ilist_inner); + memory->destroy(numneigh_inner); + memory->sfree(firstneigh_inner); + delete [] ipage_inner; + } + + if (respamiddle) { + memory->destroy(ilist_middle); + memory->destroy(numneigh_middle); + memory->sfree(firstneigh_middle); + delete [] ipage_middle; } delete [] iskip; @@ -108,8 +128,7 @@ NeighList::~NeighList() copy -> set listcopy for list to copy from skip -> set listskip for list to skip from, create copy of itype,ijtype halffull -> set listfull for full list to derive from - history -> set LH and FH ptrs in partner list that uses the history info - respaouter -> set listinner/listmiddle for other rRESPA lists + respaouter -> set all 3 outer/middle/inner flags bond -> set fix_bond to Fix that made the request ------------------------------------------------------------------------- */ @@ -120,8 +139,11 @@ void NeighList::post_constructor(NeighRequest *nq) occasional = nq->occasional; ghost = nq->ghost; ssa = nq->ssa; + history = nq->history; + respaouter = nq->respaouter; + respamiddle = nq->respamiddle; + respainner = nq->respainner; copy = nq->copy; - dnum = nq->dnum; if (nq->copy) listcopy = neighbor->lists[nq->copylist]; @@ -141,24 +163,6 @@ void NeighList::post_constructor(NeighRequest *nq) if (nq->halffull) listfull = neighbor->lists[nq->halffulllist]; - if (nq->history) { - neighbor->lists[nq->historylist]->listhistory = this; - int tmp; - neighbor->lists[nq->historylist]->fix_history = - (Fix *) ((Pair *) nq->requestor)->extract("history",tmp); - } - - if (nq->respaouter) { - if (nq->respamiddlelist < 0) { - respamiddle = 0; - listinner = neighbor->lists[nq->respainnerlist]; - } else { - respamiddle = 1; - listmiddle = neighbor->lists[nq->respamiddlelist]; - listinner = neighbor->lists[nq->respainnerlist]; - } - } - if (nq->bond) fix_bond = (Fix *) nq->requestor; } @@ -174,32 +178,29 @@ void NeighList::setup_pages(int pgsize_caller, int oneatom_caller) for (int i = 0; i < nmypage; i++) ipage[i].init(oneatom,pgsize,PGDELTA); - if (dnum) { - dpage = new MyPage<double>[nmypage]; + if (respainner) { + ipage_inner = new MyPage<int>[nmypage]; for (int i = 0; i < nmypage; i++) - dpage[i].init(dnum*oneatom,dnum*pgsize,PGDELTA); - } else dpage = NULL; + ipage_inner[i].init(oneatom,pgsize,PGDELTA); + } + + if (respamiddle) { + ipage_middle = new MyPage<int>[nmypage]; + for (int i = 0; i < nmypage; i++) + ipage_middle[i].init(oneatom,pgsize,PGDELTA); + } } /* ---------------------------------------------------------------------- grow per-atom data to allow for nlocal/nall atoms - for parent lists: - also trigger grow in child list(s) which are not built themselves - history calls grow() in listhistory - respaouter calls grow() in respainner, respamiddle triggered by neighbor list build not called if a copy list ------------------------------------------------------------------------- */ void NeighList::grow(int nlocal, int nall) { - // trigger grow() in children before possible return - - if (listhistory) listhistory->grow(nlocal,nall); - if (listinner) listinner->grow(nlocal,nall); - if (listmiddle) listmiddle->grow(nlocal,nall); - // skip if data structs are already big enough + if (ssa) { if ((nlocal * 3) + nall <= maxatom) return; } else if (ghost) { @@ -218,10 +219,25 @@ void NeighList::grow(int nlocal, int nall) memory->create(numneigh,maxatom,"neighlist:numneigh"); firstneigh = (int **) memory->smalloc(maxatom*sizeof(int *), "neighlist:firstneigh"); - if (dnum) { - memory->sfree(firstdouble); - firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *), - "neighlist:firstdouble"); + + if (respainner) { + memory->destroy(ilist_inner); + memory->destroy(numneigh_inner); + memory->sfree(firstneigh_inner); + memory->create(ilist_inner,maxatom,"neighlist:ilist_inner"); + memory->create(numneigh_inner,maxatom,"neighlist:numneigh_inner"); + firstneigh_inner = (int **) memory->smalloc(maxatom*sizeof(int *), + "neighlist:firstneigh_inner"); + } + + if (respamiddle) { + memory->destroy(ilist_middle); + memory->destroy(numneigh_middle); + memory->sfree(firstneigh_middle); + memory->create(ilist_middle,maxatom,"neighlist:ilist_middle"); + memory->create(numneigh_middle,maxatom,"neighlist:numneigh_middle"); + firstneigh_middle = (int **) memory->smalloc(maxatom*sizeof(int *), + "neighlist:firstneigh_middle"); } } @@ -253,22 +269,20 @@ void NeighList::print_attributes() printf(" %d = size\n",rq->size); printf(" %d = history\n",rq->history); printf(" %d = granonesided\n",rq->granonesided); - printf(" %d = respainner\n",rq->respainner); - printf(" %d = respamiddle\n",rq->respamiddle); printf(" %d = respaouter\n",rq->respaouter); + printf(" %d = respamiddle\n",rq->respamiddle); + printf(" %d = respainner\n",rq->respainner); printf(" %d = bond\n",rq->bond); printf(" %d = omp\n",rq->omp); printf(" %d = intel\n",rq->intel); printf(" %d = kokkos host\n",rq->kokkos_host); printf(" %d = kokkos device\n",rq->kokkos_device); printf(" %d = ssa flag\n",ssa); - printf(" %d = dnum\n",dnum); printf("\n"); printf(" %d = skip flag\n",rq->skip); printf(" %d = off2on\n",rq->off2on); printf(" %d = copy flag\n",rq->copy); printf(" %d = half/full\n",rq->halffull); - printf(" %d = history/partner\n",rq->history_partner); printf("\n"); } @@ -292,10 +306,23 @@ bigint NeighList::memory_usage() bytes += ipage[i].size(); } - if (dnum && dpage) { - for (int i = 0; i < nmypage; i++) { - bytes += maxatom * sizeof(double *); - bytes += dpage[i].size(); + if (respainner) { + bytes += memory->usage(ilist_inner,maxatom); + bytes += memory->usage(numneigh_inner,maxatom); + bytes += maxatom * sizeof(int *); + if (ipage_inner) { + for (int i = 0; i < nmypage; i++) + bytes += ipage_inner[i].size(); + } + } + + if (respamiddle) { + bytes += memory->usage(ilist_middle,maxatom); + bytes += memory->usage(numneigh_middle,maxatom); + bytes += maxatom * sizeof(int *); + if (ipage_middle) { + for (int i = 0; i < nmypage; i++) + bytes += ipage_middle[i].size(); } } diff --git a/src/neigh_list.h b/src/neigh_list.h index 4010a68857f3e9557c16224dc6e444cc652b8d79..d633ba839e0fc286856f54148a57f64f6559fc0c 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -34,9 +34,12 @@ class NeighList : protected Pointers { int occasional; // 0 if build every reneighbor, 1 if not int ghost; // 1 if list stores neighbors of ghosts int ssa; // 1 if list stores Shardlow data - int copy; // 1 if this list is (host) copied from another list + int history; // 1 if there is neigh history (FixNeighHist) + int respaouter; // 1 if list is a rRespa outer list + int respamiddle; // 1 if there is also a rRespa middle list + int respainner; // 1 if there is also a rRespa inner list + int copy; // 1 if this list is copied from another list int copymode; // 1 if this is a Kokkos on-device copy - int dnum; // # of doubles per neighbor, 0 if none // data structs to store neighbor pairs I,J and associated values @@ -45,13 +48,28 @@ class NeighList : protected Pointers { int *ilist; // local indices of I atoms int *numneigh; // # of J neighbors for each I atom int **firstneigh; // ptr to 1st J int value of each I atom - double **firstdouble; // ptr to 1st J double value of each I atom int maxatom; // size of allocated per-atom arrays int pgsize; // size of each page int oneatom; // max size for one atom MyPage<int> *ipage; // pages of neighbor indices - MyPage<double> *dpage; // pages of neighbor doubles, if dnum > 0 + + // data structs to store rRESPA neighbor pairs I,J and associated values + + int inum_inner; // # of I atoms neighbors are stored for + int gnum_inner; // # of ghost atoms neighbors are stored for + int *ilist_inner; // local indices of I atoms + int *numneigh_inner; // # of J neighbors for each I atom + int **firstneigh_inner; // ptr to 1st J int value of each I atom + + int inum_middle; // # of I atoms neighbors are stored for + int gnum_middle; // # of ghost atoms neighbors are stored for + int *ilist_middle; // local indices of I atoms + int *numneigh_middle; // # of J neighbors for each I atom + int **firstneigh_middle; // ptr to 1st J int value of each I atom + + MyPage<int> *ipage_inner; // pages of neighbor indices for inner + MyPage<int> *ipage_middle; // pages of neighbor indices for middle // atom types to skip when building list // copied info from corresponding request into realloced vec/array @@ -65,13 +83,6 @@ class NeighList : protected Pointers { NeighList *listskip; // me = skip list, point to list I skip from NeighList *listfull; // me = half list, point to full I derive from - NeighList *listhistory; // list storing neigh history - class Fix *fix_history; // fix that stores history info - - int respamiddle; // 1 if this respaouter has middle list - NeighList *listinner; // me = respaouter, point to respainner - NeighList *listmiddle; // me = respaouter, point to respamiddle - class Fix *fix_bond; // fix that stores bond info // Kokkos package diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index 8d720e766cc6930c7d3f65ffa5e2582368d5a1a6..6325eec56683e892b5384f20ed2d9610dfdb19fa 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -42,7 +42,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) // default is use newton_pair setting in force // default is no neighbors of ghosts // default is use cutoffs, not size of particles - // default is no additional neighbor history info + // default is no associated neighbor history info in FixNeighHistory // default is no one-sided sphere/surface interactions (when size = 1) // default is neighbors of atoms, not bonds // default is no multilevel rRESPA neighbors @@ -68,8 +68,6 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) cut = 0; cutoff = 0.0; - dnum = 0; - // skip info, default is no skipping skip = 0; @@ -88,11 +86,6 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) copylist = -1; halffull = 0; halffulllist = -1; - history_partner = 0; - historylist = -1; - respaouterlist = -1; - respamiddlelist = -1; - respainnerlist = -1; unique = 0; // internal settings @@ -158,8 +151,6 @@ int NeighRequest::identical(NeighRequest *other) if (copy != other->copy) same = 0; if (cutoff != other->cutoff) same = 0; - if (dnum != other->dnum) same = 0; - if (skip != other->skip) same = 0; if (skip) same = same_skip(other); @@ -226,8 +217,6 @@ void NeighRequest::copy_request(NeighRequest *other, int skipflag) cut = other->cut; cutoff = other->cutoff; - dnum = other->dnum; - iskip = NULL; ijskip = NULL; diff --git a/src/neigh_request.h b/src/neigh_request.h index 70f7783a70c7af6de31346892f2356e55ce64f8c..16e6f1a8c020e822432269448052e43a8548dc9f 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -59,12 +59,12 @@ class NeighRequest : protected Pointers { int ghost; // 1 if includes ghost atom neighbors int size; // 1 if pair cutoff set by particle radius - int history; // 1 if stores neighbor history info + int history; // 1 if there is also neigh history info (FixNeighHist) int granonesided; // 1 if one-sided granular list for // sphere/surf interactions - int respainner; // 1 if a rRESPA inner list - int respamiddle; // 1 if a rRESPA middle list - int respaouter; // 1 if a rRESPA outer list + int respainner; // 1 if need a rRESPA inner list + int respamiddle; // 1 if need a rRESPA middle list + int respaouter; // 1 if need a rRESPA outer list int bond; // 1 if store bond neighbors instead of atom neighs int omp; // set by USER-OMP package int intel; // set by USER-INTEL package @@ -74,8 +74,6 @@ class NeighRequest : protected Pointers { int cut; // 1 if use a non-standard cutoff length double cutoff; // special cutoff distance for this list - int dnum; // # of extra floating point values stored in list - // flags set by pair hybrid int skip; // 1 if this list skips atom types from another list @@ -100,21 +98,9 @@ class NeighRequest : protected Pointers { int halffull; // 1 if half list computed from another full list int halffulllist; // index of full list to derive half from - int history_partner; // 1 if this list partners with a history list - int historylist; // index of the associated history list - // for history = 1, index of the non-history partner - - int respaouterlist; // index of respaouter/middle/inner lists - int respamiddlelist; // which this rREPSA list is associated with - int respainnerlist; // each rRESPA style list points at the others - int unique; // 1 if this list requires its own // NStencil, Nbin class - because of requestor cutoff - // pointer to FSH class, set by requestor class (not by Neighbor) - - class FixShearHistory *fix_history; // fix that stores per-atom history info - // ----------------------------- // internal settings made by Neighbor class // ----------------------------- diff --git a/src/neighbor.cpp b/src/neighbor.cpp index a460be0065de2b042280618032ec5b0cb776f8b6..cc2e5d6d11a4c6e6df8f0799a5d3365fee7b8330 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -133,8 +133,6 @@ pairclass(NULL), pairnames(NULL), pairmasks(NULL) old_pgsize = pgsize; old_oneatom = oneatom; - zeroes = NULL; - binclass = NULL; binnames = NULL; binmasks = NULL; @@ -208,8 +206,6 @@ Neighbor::~Neighbor() if (old_requests[i]) delete old_requests[i]; memory->sfree(old_requests); - delete [] zeroes; - delete [] binclass; delete [] binnames; delete [] binmasks; @@ -666,14 +662,12 @@ int Neighbor::init_pair() // purpose is to avoid duplicate or inefficient builds // may add new requests if a needed request to derive from does not exist // methods: - // (1) other = point history and rRESPA lists at their partner lists + // (1) unique = create unique lists if cutoff is explicitly set // (2) skip = create any new non-skip lists needed by pair hybrid skip lists // (3) granular = adjust parent and skip lists for granular onesided usage // (4) h/f = pair up any matching half/full lists // (5) copy = convert as many lists as possible to copy lists // order of morph methods matters: - // (1) before (2), b/c (2) needs to know history partner pairings - // (2) after (1), b/c (2) may also need to create new history lists // (3) after (2), b/c it adjusts lists created by (2) // (4) after (2) and (3), // b/c (2) may create new full lists, (3) may change them @@ -681,7 +675,7 @@ int Neighbor::init_pair() int nrequest_original = nrequest; - morph_other(); + morph_unique(); morph_skip(); morph_granular(); // this method can change flags set by requestor morph_halffull(); @@ -827,23 +821,13 @@ int Neighbor::init_pair() } // allocate initial pages for each list, except if copy flag set - // allocate dnum vector of zeroes if set - int dnummax = 0; for (i = 0; i < nlist; i++) { if (lists[i]->copy) continue; lists[i]->setup_pages(pgsize,oneatom); - dnummax = MAX(dnummax,lists[i]->dnum); - } - - if (dnummax) { - delete [] zeroes; - zeroes = new double[dnummax]; - for (i = 0; i < dnummax; i++) zeroes[i] = 0.0; } // first-time allocation of per-atom data for lists that are built and store - // lists that are not built: granhistory, respa inner/middle (no neigh_pair) // lists that do not store: copy // use atom->nmax for both grow() args // i.e. grow first time to expanded size to avoid future reallocs @@ -923,40 +907,16 @@ int Neighbor::init_pair() /* ---------------------------------------------------------------------- scan NeighRequests to set additional flags - only for history, respaouter, custom cutoff lists + only for custom cutoff lists ------------------------------------------------------------------------- */ -void Neighbor::morph_other() +void Neighbor::morph_unique() { NeighRequest *irq; for (int i = 0; i < nrequest; i++) { irq = requests[i]; - // if history, point this list and partner list at each other - - if (irq->history) { - irq->historylist = i-1; - requests[i-1]->history_partner = 1; - requests[i-1]->historylist = i; - } - - // if respaouter, point all associated rRESPA lists at each other - - if (irq->respaouter) { - if (requests[i-1]->respainner) { - irq->respainnerlist = i-1; - requests[i-1]->respaouterlist = i; - } else { - irq->respamiddlelist = i-1; - requests[i-1]->respaouterlist = i; - requests[i-1]->respainnerlist = i-1; - irq->respainnerlist = i-2; - requests[i-2]->respaouterlist = i; - requests[i-2]->respamiddlelist = i-1; - } - } - // if cut flag set by requestor, set unique flag // this forces Pair,Stencil,Bin styles to be instantiated separately @@ -987,8 +947,6 @@ void Neighbor::morph_skip() // halffull list and its full parent may both skip, // but are checked to insure matching skip info - if (irq->history) continue; - if (irq->respainner || irq->respamiddle) continue; if (irq->halffull) continue; if (irq->copy) continue; @@ -1021,12 +979,11 @@ void Neighbor::morph_skip() // else 2 lists do not store same pairs // or their data structures are different // this includes custom cutoff set by requestor - // no need to check respaouter b/c it stores same pairs - // no need to check dnum b/c only set for history // NOTE: need check for 2 Kokkos flags? if (irq->ghost != jrq->ghost) continue; if (irq->size != jrq->size) continue; + if (irq->history != jrq->history) continue; if (irq->bond != jrq->bond) continue; if (irq->omp != jrq->omp) continue; if (irq->intel != jrq->intel) continue; @@ -1045,8 +1002,8 @@ void Neighbor::morph_skip() // else create a new identical list except non-skip // for new list, set neigh = 1, skip = 0, no skip vec/array, // copy unique flag (since copy_request() will not do it) - // note: parents of skip lists do not have associated history list - // b/c child skip lists store their own history info + // note: parents of skip lists do not have associated history + // b/c child skip lists have the associated history if (j < nrequest) irq->skiplist = j; else { @@ -1107,7 +1064,6 @@ void Neighbor::morph_granular() if (onesided == 2) break; } - // if onesided = 2, parent has children with both granonesided = 0/1 // force parent newton off (newton = 2) to enable onesided skip by child // set parent granonesided = 0, so it stores all neighs in usual manner @@ -1159,8 +1115,6 @@ void Neighbor::morph_halffull() // these lists are created other ways, no need for halffull // do want to process skip lists - if (irq->history) continue; - if (irq->respainner || irq->respamiddle) continue; if (irq->copy) continue; // check all other lists @@ -1179,11 +1133,10 @@ void Neighbor::morph_halffull() // else 2 lists do not store same pairs // or their data structures are different // this includes custom cutoff set by requestor - // no need to check respaouter b/c it stores same pairs - // no need to check dnum b/c only set for history if (irq->ghost != jrq->ghost) continue; if (irq->size != jrq->size) continue; + if (irq->history != jrq->history) continue; if (irq->bond != jrq->bond) continue; if (irq->omp != jrq->omp) continue; if (irq->intel != jrq->intel) continue; @@ -1230,12 +1183,6 @@ void Neighbor::morph_copy() if (irq->copy) continue; - // these lists are created other ways, no need to copy - // skip lists are eligible to become a copy list - - if (irq->history) continue; - if (irq->respainner || irq->respamiddle) continue; - // check all other lists for (j = 0; j < nrequest; j++) { @@ -1272,9 +1219,9 @@ void Neighbor::morph_copy() if (irq->ghost && !jrq->ghost) continue; - // do not copy from a history list or a respa middle/inner list + // do not copy from a list with respa middle/inner + // b/c its outer list will not be complete - if (jrq->history) continue; if (jrq->respamiddle) continue; if (jrq->respainner) continue; @@ -1282,12 +1229,11 @@ void Neighbor::morph_copy() // else 2 lists do not store same pairs // or their data structures are different // this includes custom cutoff set by requestor - // no need to check respaouter b/c it stores same pairs // no need to check omp b/c it stores same pairs - // no need to check dnum b/c only set for history // NOTE: need check for 2 Kokkos flags? if (irq->size != jrq->size) continue; + if (irq->history != jrq->history) continue; if (irq->bond != jrq->bond) continue; if (irq->intel != jrq->intel) continue; if (irq->kokkos_host != jrq->kokkos_host) continue; @@ -1535,9 +1481,7 @@ void Neighbor::print_pairwise_info() // order these to get single output of most relevant - if (rq->history) - fprintf(out,", history for (%d)",rq->historylist+1); - else if (rq->copy) + if (rq->copy) fprintf(out,", copy from (%d)",rq->copylist+1); else if (rq->halffull) fprintf(out,", half/full from (%d)",rq->halffulllist+1); @@ -1562,9 +1506,8 @@ void Neighbor::print_pairwise_info() if (rq->size) fprintf(out,", size"); if (rq->history) fprintf(out,", history"); if (rq->granonesided) fprintf(out,", onesided"); - if (rq->respainner) fprintf(out,", respa outer"); - if (rq->respamiddle) fprintf(out,", respa middle"); - if (rq->respaouter) fprintf(out,", respa inner"); + if (rq->respamiddle) fprintf(out,", respa outer/middle/inner"); + else if (rq->respainner) fprintf(out,", respa outer/inner"); if (rq->bond) fprintf(out,", bond"); if (rq->omp) fprintf(out,", omp"); if (rq->intel) fprintf(out,", intel"); @@ -1659,8 +1602,6 @@ int Neighbor::choose_bin(NeighRequest *rq) if (style == NSQ) return 0; if (rq->skip || rq->copy || rq->halffull) return 0; - if (rq->history) return 0; - if (rq->respainner || rq->respamiddle) return 0; // use request settings to match exactly one NBin class mask // checks are bitwise using NeighConst bit masks @@ -1701,8 +1642,6 @@ int Neighbor::choose_stencil(NeighRequest *rq) if (style == NSQ) return 0; if (rq->skip || rq->copy || rq->halffull) return 0; - if (rq->history) return 0; - if (rq->respainner || rq->respamiddle) return 0; // convert newton request to newtflag = on or off @@ -1793,11 +1732,6 @@ int Neighbor::choose_stencil(NeighRequest *rq) int Neighbor::choose_pair(NeighRequest *rq) { - // no neighbor list build performed - - if (rq->history) return 0; - if (rq->respainner || rq->respamiddle) return 0; - // error check for includegroup with ghost neighbor request if (includegroup && rq->ghost) diff --git a/src/neighbor.h b/src/neighbor.h index 64bced2293daec3bf040092a5097701ed0de1f8a..9244bc575d9b355780996e98e6fb4b63f9aa0651 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -54,7 +54,6 @@ class Neighbor : protected Pointers { double *bboxlo,*bboxhi; // ptrs to full domain bounding box // different for orthog vs triclinic - double *zeroes; // vector of zeroes for shear history init // exclusion info, used by NeighPair @@ -205,7 +204,7 @@ class Neighbor : protected Pointers { int init_pair(); virtual void init_topology(); - void morph_other(); + void morph_unique(); void morph_skip(); void morph_granular(); void morph_halffull(); diff --git a/src/npair.cpp b/src/npair.cpp index 9fbb4d219db3b6f8e357b43f1e70cc3b7ab29cea..dd3a73926e0d2b363da42231ad245a13b1386280 100644 --- a/src/npair.cpp +++ b/src/npair.cpp @@ -66,7 +66,6 @@ void NPair::copy_neighbor_info() cut_inner_sq = neighbor->cut_inner_sq; cut_middle_sq = neighbor->cut_middle_sq; cut_middle_inside_sq = neighbor->cut_middle_inside_sq; - zeroes = neighbor->zeroes; bboxlo = neighbor->bboxlo; bboxhi = neighbor->bboxhi; diff --git a/src/npair.h b/src/npair.h index 6941b86164c674a81ecfd282af718fafe6aa646a..289a1348d92b7747f97196e7c7472fbace433c51 100644 --- a/src/npair.h +++ b/src/npair.h @@ -47,7 +47,6 @@ class NPair : protected Pointers { double cut_inner_sq; double cut_middle_sq; double cut_middle_inside_sq; - double *zeroes; double *bboxlo,*bboxhi; // exclusion data from Neighbor class diff --git a/src/npair_copy.cpp b/src/npair_copy.cpp index 1799d48fede85194fa9fc16989395b175d1958df..9426d22ed3595dd2157d20adbf56265d3f3bb42b 100644 --- a/src/npair_copy.cpp +++ b/src/npair_copy.cpp @@ -40,7 +40,5 @@ void NPairCopy::build(NeighList *list) list->ilist = listcopy->ilist; list->numneigh = listcopy->numneigh; list->firstneigh = listcopy->firstneigh; - list->firstdouble = listcopy->firstdouble; list->ipage = listcopy->ipage; - list->dpage = listcopy->dpage; } diff --git a/src/npair_half_respa_bin_newtoff.cpp b/src/npair_half_respa_bin_newtoff.cpp index 11246b4af8746f7b008e8872cebcd64cc71a66bd..0145771f4aafa11a3083c70466bb9476ad771b29 100644 --- a/src/npair_half_respa_bin_newtoff.cpp +++ b/src/npair_half_respa_bin_newtoff.cpp @@ -63,22 +63,19 @@ void NPairHalfRespaBinNewtoff::build(NeighList *list) int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; } int inum = 0; @@ -185,6 +182,6 @@ void NPairHalfRespaBinNewtoff::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_half_respa_bin_newton.cpp b/src/npair_half_respa_bin_newton.cpp index db766780360b2fc382eec5a87e9a1c8d5722da9d..72a613204d889c8d7c5873d424aa8d9491b69f48 100644 --- a/src/npair_half_respa_bin_newton.cpp +++ b/src/npair_half_respa_bin_newton.cpp @@ -62,22 +62,19 @@ void NPairHalfRespaBinNewton::build(NeighList *list) int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; } int inum = 0; @@ -231,6 +228,6 @@ void NPairHalfRespaBinNewton::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index 4ec6685e1d4580024458891ae3da7a436b79753b..add1cf6e5cb50b36fed9dc0d30b8fff7e554ecbe 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -63,22 +63,19 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; } int inum = 0; @@ -193,6 +190,6 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_half_respa_nsq_newtoff.cpp b/src/npair_half_respa_nsq_newtoff.cpp index 1bb2034384f5bd72915f032f747748923feee0c2..c0e932f0aeededae43300ff794cd5508fc8eb2b5 100644 --- a/src/npair_half_respa_nsq_newtoff.cpp +++ b/src/npair_half_respa_nsq_newtoff.cpp @@ -67,22 +67,19 @@ void NPairHalfRespaNsqNewtoff::build(NeighList *list) int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; } int inum = 0; @@ -180,6 +177,6 @@ void NPairHalfRespaNsqNewtoff::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_half_respa_nsq_newton.cpp b/src/npair_half_respa_nsq_newton.cpp index 9aacc702cc854caab4faded04a6090e7ae7e8c7a..f7d161896d47bc520278b68fd0674ef5fa2eae82 100644 --- a/src/npair_half_respa_nsq_newton.cpp +++ b/src/npair_half_respa_nsq_newton.cpp @@ -69,22 +69,19 @@ void NPairHalfRespaNsqNewton::build(NeighList *list) int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; } int inum = 0; @@ -200,6 +197,6 @@ void NPairHalfRespaNsqNewton::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_half_size_bin_newtoff.cpp b/src/npair_half_size_bin_newtoff.cpp index 571b2484ea425aee53439974d08eb974d0cf2604..cf608b5d595ab5d27cbc287b64b78a77efdb317a 100644 --- a/src/npair_half_size_bin_newtoff.cpp +++ b/src/npair_half_size_bin_newtoff.cpp @@ -17,9 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -32,7 +29,6 @@ NPairHalfSizeBinNewtoff::NPairHalfSizeBinNewtoff(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- size particles binned neighbor list construction with partial Newton's 3rd law - shear history must be accounted for when a neighbor pair is added each owned atom i checks own bin and surrounding bins in non-Newton stencil pair stored once if i,j are both owned and i < j pair stored by me if j is ghost (also stored by proc owning j) @@ -40,20 +36,10 @@ NPairHalfSizeBinNewtoff::NPairHalfSizeBinNewtoff(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfSizeBinNewtoff::build(NeighList *list) { - int i,j,k,m,n,nn,ibin,dnum,dnumbytes; + int i,j,k,m,n,nn,ibin; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -64,42 +50,20 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list) int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; + int history = list->history; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } + int mask_history = 3 << SBBITS; int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -116,38 +80,19 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list) for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { if (j <= i) continue; if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; - + delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; radsum = radi + radius[j]; cutsq = (radsum+skin) * (radsum+skin); - + if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } } @@ -158,13 +103,6 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_half_size_bin_newton.cpp b/src/npair_half_size_bin_newton.cpp index 4f4ecccb165d4289108abcbade6ff14d51ae9c8b..662bf91d6e9606d4c00a1a42ce130c34beea17a6 100644 --- a/src/npair_half_size_bin_newton.cpp +++ b/src/npair_half_size_bin_newton.cpp @@ -17,9 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -32,27 +29,16 @@ NPairHalfSizeBinNewton::NPairHalfSizeBinNewton(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- size particles binned neighbor list construction with full Newton's 3rd law - shear history must be accounted for when a neighbor pair is added each owned atom i checks its own bin and other bins in Newton stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ void NPairHalfSizeBinNewton::build(NeighList *list) { - int i,j,k,m,n,nn,ibin,dnum,dnumbytes; + int i,j,k,m,n,nn,ibin; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -63,42 +49,20 @@ void NPairHalfSizeBinNewton::build(NeighList *list) int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; + int history = list->history; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } + int mask_history = 3 << SBBITS; int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -128,29 +92,10 @@ void NPairHalfSizeBinNewton::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -169,29 +114,10 @@ void NPairHalfSizeBinNewton::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } } @@ -202,13 +128,6 @@ void NPairHalfSizeBinNewton::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp index 559eb09a7a5f731e4a1398f36644bb0d8a03f7f5..e70c0722804dad9ef06858a7d2f37d77aa33dd62 100644 --- a/src/npair_half_size_bin_newton_tri.cpp +++ b/src/npair_half_size_bin_newton_tri.cpp @@ -17,9 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -33,27 +30,16 @@ NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) : /* ---------------------------------------------------------------------- size particles binned neighbor list construction with Newton's 3rd law for triclinic - shear history must be accounted for when a neighbor pair is added each owned atom i checks its own bin and other bins in triclinic stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ void NPairHalfSizeBinNewtonTri::build(NeighList *list) { - int i,j,k,m,n,nn,ibin,dnum,dnumbytes; + int i,j,k,m,n,nn,ibin; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -64,42 +50,20 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; + int history = list->history; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } + int mask_history = 3 << SBBITS; int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -134,29 +98,10 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n++] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } } @@ -167,13 +112,6 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_half_size_nsq_newtoff.cpp b/src/npair_half_size_nsq_newtoff.cpp index 56630a9dc803a4cd5053eea8676b5044eae5cdca..e6f5cba657f79ec1318e4a15b2050e34d0c480e5 100644 --- a/src/npair_half_size_nsq_newtoff.cpp +++ b/src/npair_half_size_nsq_newtoff.cpp @@ -18,9 +18,6 @@ #include "atom.h" #include "atom_vec.h" #include "group.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -33,27 +30,16 @@ NPairHalfSizeNsqNewtoff::NPairHalfSizeNsqNewtoff(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- size particles N^2 / 2 search for neighbor pairs with partial Newton's 3rd law - shear history must be accounted for when a neighbor pair is added pair added to list if atoms i and j are both owned and i < j pair added if j is ghost (also stored by proc owning j) ------------------------------------------------------------------------- */ void NPairHalfSizeNsqNewtoff::build(NeighList *list) { - int i,j,m,n,nn,bitmask,dnum,dnumbytes; + int i,j,m,n,nn,bitmask; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -68,42 +54,20 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list) bitmask = group->bitmask[includegroup]; } + int history = list->history; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nall; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } + int mask_history = 3 << SBBITS; int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } xtmp = x[i][0]; ytmp = x[i][1]; @@ -124,29 +88,10 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -156,13 +101,6 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_half_size_nsq_newton.cpp b/src/npair_half_size_nsq_newton.cpp index 177685b9fc21a5107ee44d2449b083b9ac2558a1..78811170cbd50450234a70b1f580ab2a91df43fc 100644 --- a/src/npair_half_size_nsq_newton.cpp +++ b/src/npair_half_size_nsq_newton.cpp @@ -18,9 +18,6 @@ #include "atom.h" #include "atom_vec.h" #include "group.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -33,7 +30,6 @@ NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- size particles N^2 / 2 search for neighbor pairs with full Newton's 3rd law - shear history must be accounted for when a neighbor pair is added pair added to list if atoms i and j are both owned and i < j if j is ghost only me or other proc adds pair decision based on itag,jtag tests @@ -41,20 +37,10 @@ NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfSizeNsqNewton::build(NeighList *list) { - int i,j,m,n,nn,itag,jtag,bitmask,dnum,dnumbytes; + int i,j,m,n,nn,itag,jtag,bitmask; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; - int *neighptr,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr; double **x = atom->x; double *radius = atom->radius; @@ -69,42 +55,20 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) bitmask = group->bitmask[includegroup]; } + int history = list->history; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; MyPage<int> *ipage = list->ipage; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nall; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } + int mask_history = 3 << SBBITS; int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } itag = tag[i]; xtmp = x[i][0]; @@ -142,29 +106,10 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) cutsq = (radsum+skin) * (radsum+skin); if (rsq <= cutsq) { - neighptr[n] = j; - - if (fix_history) { - if (rsq < radsum*radsum) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == tag[j]) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + if (history && rsq < radsum*radsum) + neighptr[n++] = j ^ mask_history; + else + neighptr[n++] = j; } } @@ -174,13 +119,6 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_skip_respa.cpp b/src/npair_skip_respa.cpp index 31420b32d1e69ad80025e231d0a76064f0b5bc53..1d4eda53549f9ec180dd7e9d2c66c0999a2fd88f 100644 --- a/src/npair_skip_respa.cpp +++ b/src/npair_skip_respa.cpp @@ -53,28 +53,24 @@ void NPairSkipRespa::build(NeighList *list) int *iskip = list->iskip; int **ijskip = list->ijskip; - NeighList *listinner = list->listinner; - int *ilist_inner = listinner->ilist; - int *numneigh_inner = listinner->numneigh; - int **firstneigh_inner = listinner->firstneigh; - MyPage<int> *ipage_inner = listinner->ipage; + int *ilist_inner = list->ilist_inner; + int *numneigh_inner = list->numneigh_inner; + int **firstneigh_inner = list->firstneigh_inner; + MyPage<int> *ipage_inner = list->ipage_inner; + int *numneigh_inner_skip = list->listskip->numneigh_inner; + int **firstneigh_inner_skip = list->listskip->firstneigh_inner; - int *numneigh_inner_skip = list->listskip->listinner->numneigh; - int **firstneigh_inner_skip = list->listskip->listinner->firstneigh; - - NeighList *listmiddle; int *ilist_middle,*numneigh_middle,**firstneigh_middle; MyPage<int> *ipage_middle; int *numneigh_middle_skip,**firstneigh_middle_skip; int respamiddle = list->respamiddle; if (respamiddle) { - listmiddle = list->listmiddle; - ilist_middle = listmiddle->ilist; - numneigh_middle = listmiddle->numneigh; - firstneigh_middle = listmiddle->firstneigh; - ipage_middle = listmiddle->ipage; - numneigh_middle_skip = list->listskip->listmiddle->numneigh; - firstneigh_middle_skip = list->listskip->listmiddle->firstneigh; + ilist_middle = list->ilist_middle; + numneigh_middle = list->numneigh_middle; + firstneigh_middle = list->firstneigh_middle; + ipage_middle = list->ipage_middle; + numneigh_middle_skip = list->listskip->numneigh_middle; + firstneigh_middle_skip = list->listskip->firstneigh_middle; } int inum = 0; @@ -164,6 +160,6 @@ void NPairSkipRespa::build(NeighList *list) } list->inum = inum; - listinner->inum = inum; - if (respamiddle) listmiddle->inum = inum; + list->inum_inner = inum; + if (respamiddle) list->inum_middle = inum; } diff --git a/src/npair_skip_size.cpp b/src/npair_skip_size.cpp index e8d19dedcaf46b6461e9fd989d0596af89c6624c..075387f5b043cc857ddf2a234ebfc6a4f6c5ec15 100644 --- a/src/npair_skip_size.cpp +++ b/src/npair_skip_size.cpp @@ -17,9 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -32,24 +29,13 @@ NPairSkipSize::NPairSkipSize(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build skip list for subset of types from parent list iskip and ijskip flag which atom types and type pairs to skip - if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSize::build(NeighList *list) { int i,j,ii,jj,m,n,nn,itype,jnum,joriginal,dnum,dnumbytes; tagint jtag; - int *neighptr,*jlist,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr,*jlist; tagint *tag = atom->tag; int *type = atom->type; @@ -68,28 +54,8 @@ void NPairSkipSize::build(NeighList *list) int *iskip = list->iskip; int **ijskip = list->ijskip; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } - int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } // loop over atoms in other list // skip I atom entirely if iskip is set for type[I] @@ -102,13 +68,8 @@ void NPairSkipSize::build(NeighList *list) n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } - // loop over parent non-skip size list and optionally its history info + // loop over parent non-skip size list jlist = firstneigh_skip[i]; jnum = numneigh_skip[i]; @@ -117,29 +78,7 @@ void NPairSkipSize::build(NeighList *list) joriginal = jlist[jj]; j = joriginal & NEIGHMASK; if (ijskip[itype][type[j]]) continue; - neighptr[n] = joriginal; - - // no numeric test for current touch - // just use FSH partner list to infer it - // would require distance calculation for spheres - // more complex calculation for surfs - - if (fix_history) { - jtag = tag[j]; - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == jtag) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + neighptr[n++] = joriginal; } ilist[inum++] = i; @@ -148,13 +87,6 @@ void NPairSkipSize::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_skip_size_off2on.cpp b/src/npair_skip_size_off2on.cpp index da9dd57047444fd09e85ecfcb70335438b07aa60..92eae285d0a37d03b55b8bb09ef95d2e2f2e53d0 100644 --- a/src/npair_skip_size_off2on.cpp +++ b/src/npair_skip_size_off2on.cpp @@ -17,9 +17,6 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -33,24 +30,13 @@ NPairSkipSizeOff2on::NPairSkipSizeOff2on(LAMMPS *lmp) : NPair(lmp) {} build skip list for subset of types from parent list iskip and ijskip flag which atom types and type pairs to skip parent non-skip list used newton off, this skip list is newton on - if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSizeOff2on::build(NeighList *list) { int i,j,ii,jj,m,n,nn,itype,jnum,joriginal,dnum,dnumbytes; tagint itag,jtag; - int *neighptr,*jlist,*touchptr; - double *shearptr; - - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; + int *neighptr,*jlist; tagint *tag = atom->tag; int *type = atom->type; @@ -69,28 +55,8 @@ void NPairSkipSizeOff2on::build(NeighList *list) int *iskip = list->iskip; int **ijskip = list->ijskip; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } - int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } // loop over atoms in other list // skip I atom entirely if iskip is set for type[I] @@ -104,11 +70,6 @@ void NPairSkipSizeOff2on::build(NeighList *list) n = 0; neighptr = ipage->vget(); - if (fix_history) { - nn = 0; - touchptr = ipage_touch->vget(); - shearptr = dpage_shear->vget(); - } // loop over parent non-skip size list and optionally its history info @@ -125,28 +86,7 @@ void NPairSkipSizeOff2on::build(NeighList *list) jtag = tag[j]; if (j >= nlocal && jtag < itag) continue; - neighptr[n] = joriginal; - - // no numeric test for current touch - // just use FSH partner list to infer it - // would require distance calculation for spheres - // more complex calculation for surfs - - if (fix_history) { - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == jtag) break; - if (m < npartner[i]) { - touchptr[n] = 1; - memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes); - nn += dnum; - } else { - touchptr[n] = 0; - memcpy(&shearptr[nn],zeroes,dnumbytes); - nn += dnum; - } - } - - n++; + neighptr[n++] = joriginal; } ilist[inum++] = i; @@ -155,13 +95,6 @@ void NPairSkipSizeOff2on::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - if (fix_history) { - firsttouch[i] = touchptr; - firstshear[i] = shearptr; - ipage_touch->vgot(n); - dpage_shear->vgot(nn); - } } list->inum = inum; diff --git a/src/npair_skip_size_off2on_oneside.cpp b/src/npair_skip_size_off2on_oneside.cpp index 7377feec5bbefe495242deadff125c48b3fa404e..f2fca7b1284389c925650d6678cc0d7d14d87fc4 100644 --- a/src/npair_skip_size_off2on_oneside.cpp +++ b/src/npair_skip_size_off2on_oneside.cpp @@ -17,9 +17,7 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" #include "domain.h" -#include "fix_shear_history.h" #include "my_page.h" #include "error.h" @@ -35,7 +33,6 @@ NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) : iskip and ijskip flag which atom types and type pairs to skip parent non-skip list used newton off and was not onesided, this skip list is newton on and onesided - if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSizeOff2onOneside::build(NeighList *list) @@ -44,15 +41,6 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list) tagint jtag; int *surf,*jlist; - int *npartner; - tagint **partner; - double **shearpartner; - int **firsttouch; - double **firstshear; - MyPage<int> *ipage_touch; - MyPage<double> *dpage_shear; - NeighList *listhistory; - tagint *tag = atom->tag; int *type = atom->type; int nlocal = atom->nlocal; @@ -73,28 +61,8 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list) if (domain->dimension == 2) surf = atom->line; else surf = atom->tri; - FixShearHistory *fix_history = (FixShearHistory *) list->fix_history; - if (fix_history) { - fix_history->nlocal_neigh = nlocal; - fix_history->nall_neigh = nlocal + atom->nghost; - npartner = fix_history->npartner; - partner = fix_history->partner; - shearpartner = fix_history->shearpartner; - listhistory = list->listhistory; - firsttouch = listhistory->firstneigh; - firstshear = listhistory->firstdouble; - ipage_touch = listhistory->ipage; - dpage_shear = listhistory->dpage; - dnum = listhistory->dnum; - dnumbytes = dnum * sizeof(double); - } - int inum = 0; ipage->reset(); - if (fix_history) { - ipage_touch->reset(); - dpage_shear->reset(); - } // two loops over parent list required, one to count, one to store // because onesided constraint means pair I,J may be stored with I or J @@ -139,7 +107,7 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list) } } - // allocate all per-atom neigh list chunks, including history + // allocate all per-atom neigh list chunks for (i = 0; i < nlocal; i++) { if (numneigh[i] == 0) continue; @@ -147,10 +115,6 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list) firstneigh[i] = ipage->get(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - if (fix_history) { - firsttouch[i] = ipage_touch->get(n); - firstshear[i] = dpage_shear->get(dnum*n); - } } // second loop over atoms in other list to store neighbors @@ -189,32 +153,11 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list) // OK, b/c there is no special list flagging for surfs firstneigh[i][numneigh[i]] = j; - - // no numeric test for current touch - // just use FSH partner list to infer it - // would require complex calculation for surfs - - if (fix_history) { - jtag = tag[j]; - n = numneigh[i]; - nn = dnum*n; - for (m = 0; m < npartner[i]; m++) - if (partner[i][m] == jtag) break; - if (m < npartner[i]) { - firsttouch[i][n] = 1; - memcpy(&firstshear[i][nn],&shearpartner[i][dnum*m],dnumbytes); - } else { - firsttouch[i][n] = 0; - memcpy(&firstshear[i][nn],zeroes,dnumbytes); - } - } - numneigh[i]++; if (flip) i = j; } // only add atom I to ilist if it has neighbors - // fix shear/history allows for this in pre_exchange_onesided() if (numneigh[i]) ilist[inum++] = i; } diff --git a/src/output.cpp b/src/output.cpp index ce7fcb7cca3a353bb115c2f69d036014c9c70003..11c6fa073e8c05c38056efbdc1765e5f6f8da999 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -827,9 +827,9 @@ void Output::create_restart(int narg, char **arg) sum and print memory usage result is only memory on proc 0, not averaged across procs ------------------------------------------------------------------------- */ + void Output::memory_usage() { - bigint bytes = 0; bytes += atom->memory_usage(); bytes += neighbor->memory_usage(); @@ -844,9 +844,9 @@ void Output::memory_usage() MPI_Reduce(&mbytes,&mbavg,1,MPI_DOUBLE,MPI_SUM,0,world); MPI_Reduce(&mbytes,&mbmin,1,MPI_DOUBLE,MPI_MIN,0,world); MPI_Reduce(&mbytes,&mbmax,1,MPI_DOUBLE,MPI_MAX,0,world); - mbavg /= comm->nprocs; if (comm->me == 0) { + mbavg /= comm->nprocs; if (screen) fprintf(screen,"Per MPI rank memory allocation (min/avg/max) = " "%.4g | %.4g | %.4g Mbytes\n",mbmin,mbavg,mbmax); diff --git a/src/pair.h b/src/pair.h index 0f7b0f85b62dbf6684304c78fe2e257c2bf659f4..eb71e8822474aed87e024988e27b669196385cd9 100644 --- a/src/pair.h +++ b/src/pair.h @@ -92,10 +92,6 @@ class Pair : protected Pointers { class NeighList *list; // standard neighbor list used by most pairs class NeighList *listhalf; // half list used by some pairs class NeighList *listfull; // full list used by some pairs - class NeighList *listhistory; // neighbor history list used by some pairs - class NeighList *listinner; // rRESPA lists used by some pairs - class NeighList *listmiddle; - class NeighList *listouter; int allocated; // 0/1 = whether arrays are allocated // public so external driver can check diff --git a/src/pair_lj96_cut.cpp b/src/pair_lj96_cut.cpp index 83fc5bcdda4218cf68b8ef3ded7039bd8d9e2bec..842b918fe154ad8db4865e2b14cd44d8141c7ff3 100644 --- a/src/pair_lj96_cut.cpp +++ b/src/pair_lj96_cut.cpp @@ -157,10 +157,10 @@ void PairLJ96Cut::compute_inner() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -231,10 +231,10 @@ void PairLJ96Cut::compute_middle() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -318,10 +318,10 @@ void PairLJ96Cut::compute_outer(int eflag, int vflag) double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -487,36 +487,23 @@ void PairLJ96Cut::coeff(int narg, char **arg) void PairLJ96Cut::init_style() { - // request regular or rRESPA neighbor lists + // request regular or rRESPA neighbor list int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; // set rRESPA cutoffs @@ -526,19 +513,6 @@ void PairLJ96Cut::init_style() else cut_respa = NULL; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJ96Cut::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/pair_lj96_cut.h b/src/pair_lj96_cut.h index 6b677c6429cddef5edc69e4f79507d3c7e8009c0..4d6df02127eddfb2611f39adbc8c5458094bc6f7 100644 --- a/src/pair_lj96_cut.h +++ b/src/pair_lj96_cut.h @@ -33,7 +33,6 @@ class PairLJ96Cut : public Pair { void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/pair_lj_cut.cpp b/src/pair_lj_cut.cpp index 7f838061f124f8d8cb5427fc75a62a2b3193241a..215fabecbbca976d2a01c46c163bf2f0aadff734 100644 --- a/src/pair_lj_cut.cpp +++ b/src/pair_lj_cut.cpp @@ -156,10 +156,10 @@ void PairLJCut::compute_inner() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -229,10 +229,10 @@ void PairLJCut::compute_middle() double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -315,10 +315,10 @@ void PairLJCut::compute_outer(int eflag, int vflag) double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -481,36 +481,23 @@ void PairLJCut::coeff(int narg, char **arg) void PairLJCut::init_style() { - // request regular or rRESPA neighbor lists + // request regular or rRESPA neighbor list int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; // set rRESPA cutoffs @@ -520,19 +507,6 @@ void PairLJCut::init_style() else cut_respa = NULL; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairLJCut::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/pair_lj_cut.h b/src/pair_lj_cut.h index 43eeda09cabee69f07c1a3f976c4b30f9aaeeb8f..3724685db6175366ef8de2bb13319efb91f8823a 100644 --- a/src/pair_lj_cut.h +++ b/src/pair_lj_cut.h @@ -32,7 +32,6 @@ class PairLJCut : public Pair { void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/pair_mie_cut.cpp b/src/pair_mie_cut.cpp index 320f21248dbc6dbc1d30ea8294fc61fb7b2c791f..04f8de8d7d335b32aa4c00fe66cc764ea1e3abde 100644 --- a/src/pair_mie_cut.cpp +++ b/src/pair_mie_cut.cpp @@ -159,10 +159,10 @@ void PairMIECut::compute_inner() double *special_mie = force->special_lj; int newton_pair = force->newton_pair; - inum = listinner->inum; - ilist = listinner->ilist; - numneigh = listinner->numneigh; - firstneigh = listinner->firstneigh; + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; double cut_out_on = cut_respa[0]; double cut_out_off = cut_respa[1]; @@ -233,10 +233,10 @@ void PairMIECut::compute_middle() double *special_mie = force->special_lj; int newton_pair = force->newton_pair; - inum = listmiddle->inum; - ilist = listmiddle->ilist; - numneigh = listmiddle->numneigh; - firstneigh = listmiddle->firstneigh; + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; double cut_in_off = cut_respa[0]; double cut_in_on = cut_respa[1]; @@ -320,10 +320,10 @@ void PairMIECut::compute_outer(int eflag, int vflag) double *special_mie = force->special_lj; int newton_pair = force->newton_pair; - inum = listouter->inum; - ilist = listouter->ilist; - numneigh = listouter->numneigh; - firstneigh = listouter->firstneigh; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; double cut_in_off = cut_respa[2]; double cut_in_on = cut_respa[3]; @@ -496,36 +496,23 @@ void PairMIECut::coeff(int narg, char **arg) void PairMIECut::init_style() { - // request regular or rRESPA neighbor lists + // request regular or rRESPA neighbor list int irequest; + int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { - int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } - if (respa == 0) irequest = neighbor->request(this,instance_me); - else if (respa == 1) { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } else { - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 1; - neighbor->requests[irequest]->respainner = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 2; - neighbor->requests[irequest]->respamiddle = 1; - irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->id = 3; - neighbor->requests[irequest]->respaouter = 1; - } + irequest = neighbor->request(this,instance_me); - } else irequest = neighbor->request(this,instance_me); + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; // set rRESPA cutoffs @@ -535,19 +522,6 @@ void PairMIECut::init_style() else cut_respa = NULL; } -/* ---------------------------------------------------------------------- - neighbor callback to inform pair style of neighbor list to use - regular or rRESPA -------------------------------------------------------------------------- */ - -void PairMIECut::init_list(int id, NeighList *ptr) -{ - if (id == 0) list = ptr; - else if (id == 1) listinner = ptr; - else if (id == 2) listmiddle = ptr; - else if (id == 3) listouter = ptr; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/pair_mie_cut.h b/src/pair_mie_cut.h index 2a0a29843e9fc2a201068a4d5c521be7e548cc1e..9e12438d1455c12ba0393ee13379e90c71a56415 100644 --- a/src/pair_mie_cut.h +++ b/src/pair_mie_cut.h @@ -32,7 +32,6 @@ class PairMIECut : public Pair { void settings(int, char **); void coeff(int, char **); void init_style(); - void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); diff --git a/src/replicate.cpp b/src/replicate.cpp index e2ed718f652bfbb894ddc484b12c52ad295fef87..f3d196416917cab8be9080243ea9d919abdebe74 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -74,6 +74,11 @@ void Replicate::command(int narg, char **arg) if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); + // Record wall time for atom replication + + MPI_Barrier(world); + double time1 = MPI_Wtime(); + // maxtag = largest atom tag across all existing atoms tagint maxtag = 0; @@ -424,4 +429,16 @@ void Replicate::command(int narg, char **arg) Special special(lmp); special.build(); } + + // Wall time + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (me == 0) { + if (screen) + fprintf(screen," Time spent = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," Time spent = %g secs\n",time2-time1); + } } diff --git a/src/respa.cpp b/src/respa.cpp index 5d51ff64eef696215eea0498e94610b6eebf7eb5..23cd941834482d3f5047602c8fa98e426f901ba4 100644 --- a/src/respa.cpp +++ b/src/respa.cpp @@ -442,6 +442,7 @@ void Respa::setup(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -517,6 +518,7 @@ void Respa::setup_minimal(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -668,6 +670,11 @@ void Respa::recurse(int ilevel) } neighbor->build(); timer->stamp(Timer::NEIGH); + if (modify->n_post_neighbor) { + modify->post_neighbor(); + timer->stamp(Timer::MODIFY); + } + } else if (ilevel == 0) { timer->stamp(); comm->forward_comm(); diff --git a/src/verlet.cpp b/src/verlet.cpp index b242b00722477ff4e9a7a15c921ec50e6793c49b..d74906556b35c32bde5da26114a31cf7c9441b13 100644 --- a/src/verlet.cpp +++ b/src/verlet.cpp @@ -90,10 +90,9 @@ void Verlet::setup(int flag) if (comm->me == 0 && screen) { fprintf(screen,"Setting up Verlet run ...\n"); if (flag) { - fprintf(screen," Unit style : %s\n", update->unit_style); - fprintf(screen," Current step : " BIGINT_FORMAT "\n", - update->ntimestep); - fprintf(screen," Time step : %g\n", update->dt); + fprintf(screen," Unit style : %s\n",update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n",update->ntimestep); + fprintf(screen," Time step : %g\n",update->dt); timer->print_timeout(screen); } } @@ -122,6 +121,7 @@ void Verlet::setup(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -183,6 +183,7 @@ void Verlet::setup_minimal(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -227,6 +228,7 @@ void Verlet::run(int n) int n_post_integrate = modify->n_post_integrate; int n_pre_exchange = modify->n_pre_exchange; int n_pre_neighbor = modify->n_pre_neighbor; + int n_post_neighbor = modify->n_post_neighbor; int n_pre_force = modify->n_pre_force; int n_pre_reverse = modify->n_pre_reverse; int n_post_force = modify->n_post_force; @@ -284,6 +286,10 @@ void Verlet::run(int n) } neighbor->build(); timer->stamp(Timer::NEIGH); + if (n_post_neighbor) { + modify->post_neighbor(); + timer->stamp(Timer::MODIFY); + } } // force computations diff --git a/src/version.h b/src/version.h index 0c4c4fda625cf8737092e1d15ed289d6e5052016..f9abe92ee4508bbce82c2a97e54bb566d673dad5 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "22 Sep 2017" +#define LAMMPS_VERSION "23 Oct 2017" diff --git a/tools/phonon/Makefile b/tools/phonon/Makefile index 0aacb1e0864e674e98b575599a8aebac9926dd87..67f9b91fdf78298fe1f65ddef7767505ee161993 100644 --- a/tools/phonon/Makefile +++ b/tools/phonon/Makefile @@ -1,7 +1,7 @@ .SUFFIXES : .o .cpp # compiler and flags -CC = g++ -Wno-unused-result -LINK = $(CC) -static +CC = g++ -Wall +LINK = $(CC) CFLAGS = -O3 $(DEBUG) $(UFLAG) # OFLAGS = -O3 $(DEBUG) @@ -9,18 +9,17 @@ INC = $(LPKINC) $(TCINC) $(SPGINC) LIB = $(LPKLIB) $(TCLIB) $(SPGLIB) # # cLapack library needed -LPKINC = -I/opt/libs/clapack/3.2.1/include -LPKLIB = -L/opt/libs/clapack/3.2.1/lib -lclapack -lblas -lf2c #-lm +LPKINC = +LPKLIB =-llapack # -# Tricubic library needed -TCINC = -I/opt/libs/tricubic/1.0/include -TCLIB = -L/opt/libs/tricubic/1.0/lib -ltricubic # # spglib 1.8.2, used to get the irreducible q-points # if UFLAG is not set, spglib won't be used. -UFLAG = -DUseSPG -SPGINC = -I/opt/libs/spglib/1.8.2/include -SPGLIB = -L/opt/libs/spglib/1.8.2/lib -lsymspg + +# UFLAG = -DUseSPG +# SPGINC = -I/opt/libs/spglib/1.8.2/include +# SPGLIB = -L/opt/libs/spglib/1.8.2/lib -lsymspg + # if spglib other than version 1.8.2 is used, please # modify file phonon.cpp, instruction can be found by searching 1.8.2 @@ -36,7 +35,7 @@ SRC = $(wildcard *.cpp) OBJ = $(SRC:.cpp=.o) #==================================================================== -all: ver ${EXE} +all: ${EXE} ${EXE}: $(OBJ) $(LINK) $(OFLAGS) $(OBJ) $(LIB) -o $@ @@ -59,3 +58,16 @@ ver: $(CC) $(CFLAGS) -c $< .cpp.o: $(CC) $(CFLAGS) $(INC) -c $< + +#==================================================================== +# dependencies +disp.o: disp.cpp phonon.h dynmat.h memory.h interpolate.h green.h timer.h \ + global.h +dynmat.o: dynmat.cpp dynmat.h memory.h interpolate.h version.h global.h +green.o: green.cpp green.h memory.h global.h +interpolate.o: interpolate.cpp interpolate.h memory.h global.h +main.o: main.cpp dynmat.h memory.h interpolate.h phonon.h +memory.o: memory.cpp memory.h +phonon.o: phonon.cpp phonon.h dynmat.h memory.h interpolate.h green.h \ + timer.h global.h +timer.o: timer.cpp timer.h diff --git a/tools/phonon/README b/tools/phonon/README index ae6383b6bd2178181a1761726c0200cbd77a6d57..b54d96d8a33300b185410705142ed27ad9db824c 100644 --- a/tools/phonon/README +++ b/tools/phonon/README @@ -5,15 +5,9 @@ analyse the phonon related information. #------------------------------------------------------------------------------- 1. Dependencies - The clapack library is needed to solve the eigen problems, - which could be downloaded from: - http://www.netlib.org/clapack/ - - The tricubic library is also needed to do tricubic interpolations, - which could be obtained from: - http://orca.princeton.edu/francois/software/tricubic/ - or - http://1drv.ms/1J2WFYk + The LAPACK library is needed to solve the eigen problems. + http://www.netlib.org/lapack/ + Intel MKL can be used as well. The spglib is optionally needed, enabling one to evaluate the phonon density of states or vibrational thermal properties diff --git a/tools/phonon/disp.cpp b/tools/phonon/disp.cpp index 2fa603916cf8e662b7764f66e9eac17edb44ec46..218e01e7fc07797ba3ac6e1843597e49e52948ca 100644 --- a/tools/phonon/disp.cpp +++ b/tools/phonon/disp.cpp @@ -18,7 +18,8 @@ void Phonon::pdisp() { // ask the output file name and write the header. char str[MAXLINE]; - for (int ii = 0; ii < 80; ++ii) printf("="); printf("\n"); + for (int ii = 0; ii < 80; ++ii) printf("="); + printf("\n"); #ifdef UseSPG // ask method to generate q-lines int method = 2; @@ -53,7 +54,6 @@ void Phonon::pdisp() while (1){ for (int i = 0; i < 3; ++i) qstr[i] = qend[i]; - int quit = 0; printf("\nPlease input the start q-point in unit of B1->B3, q to exit [%g %g %g]: ", qstr[0], qstr[1], qstr[2]); int n = count_words(fgets(str, MAXLINE, stdin)); ptr = strtok(str, " \t\n\r\f"); @@ -2844,7 +2844,8 @@ void Phonon::pdisp() printf("\nPhonon dispersion data are written to: %s, you can visualize the results\n", fname); printf("by invoking: `gnuplot pdisp.gnuplot; gv pdisp.eps`\n"); } - for (int ii = 0; ii < 80; ++ii) printf("="); printf("\n"); + for (int ii = 0; ii < 80; ++ii) printf("="); + printf("\n"); delete []fname; nodes.clear(); diff --git a/tools/phonon/dynmat.cpp b/tools/phonon/dynmat.cpp index e82f473130eb3a63822436c2ca3ae2527e66146e..3b7bfe826853f86c61a65f7becc42787130e2373 100644 --- a/tools/phonon/dynmat.cpp +++ b/tools/phonon/dynmat.cpp @@ -3,6 +3,11 @@ #include "version.h" #include "global.h" +extern "C" void zheevd_(char *, char *, long int *, doublecomplex *, + long int *, double *, doublecomplex *, + long int *, double *, long int *, long int *, + long int *, long int *); + // to initialize the class DynMat::DynMat(int narg, char **arg) { @@ -81,7 +86,8 @@ DynMat::DynMat(int narg, char **arg) printf("Number of atoms per unit cell : %d\n", nucell); printf("System dimension : %d\n", sysdim); printf("Boltzmann constant in used units : %g\n", boltz); - for (int i = 0; i < 80; ++i) printf("="); printf("\n"); + for (int i = 0; i < 80; ++i) printf("="); + printf("\n"); if (sysdim < 1||sysdim > 3||nx < 1||ny < 1||nz < 1||nucell < 1){ printf("Wrong values read from header of file: %s, please check the binary file!\n", binfile); fclose(fp); exit(3); @@ -117,11 +123,11 @@ DynMat::DynMat(int narg, char **arg) memory->create(attyp, nucell, "DynMat:attyp"); memory->create(M_inv_sqrt, nucell, "DynMat:M_inv_sqrt"); - if ( fread(&Tmeasure, sizeof(double), 1, fp) != 1 ){printf("\nError while reading temperature from file: %s\n", binfile); fclose(fp); exit(3);} - if ( fread(&basevec[0], sizeof(double), 9, fp) != 9 ){printf("\nError while reading lattice info from file: %s\n", binfile); fclose(fp); exit(3);} - if ( fread(basis[0], sizeof(double), fftdim, fp) != fftdim){printf("\nError while reading basis info from file: %s\n", binfile); fclose(fp); exit(3);} - if ( fread(&attyp[0], sizeof(int), nucell, fp) != nucell){printf("\nError while reading atom types from file: %s\n", binfile); fclose(fp); exit(3);} - if ( fread(&M_inv_sqrt[0], sizeof(double), nucell, fp) != nucell){printf("\nError while reading atomic masses from file: %s\n", binfile); fclose(fp); exit(3);} + if ( (int) fread(&Tmeasure, sizeof(double), 1, fp) != 1 ){printf("\nError while reading temperature from file: %s\n", binfile); fclose(fp); exit(3);} + if ( (int) fread(&basevec[0], sizeof(double), 9, fp) != 9 ){printf("\nError while reading lattice info from file: %s\n", binfile); fclose(fp); exit(3);} + if ( (int) fread(basis[0], sizeof(double), fftdim, fp) != fftdim){printf("\nError while reading basis info from file: %s\n", binfile); fclose(fp); exit(3);} + if ( (int) fread(&attyp[0], sizeof(int), nucell, fp) != nucell){printf("\nError while reading atom types from file: %s\n", binfile); fclose(fp); exit(3);} + if ( (int) fread(&M_inv_sqrt[0], sizeof(double), nucell, fp) != nucell){printf("\nError while reading atomic masses from file: %s\n", binfile); fclose(fp); exit(3);} fclose(fp); car2dir(); @@ -229,9 +235,9 @@ return; int DynMat::geteigen(double *egv, int flag) { char jobz, uplo; - integer n, lda, lwork, lrwork, *iwork, liwork, info; + long int n, lda, lwork, lrwork, *iwork, liwork, info; doublecomplex *work; - doublereal *w = &egv[0], *rwork; + double *w = &egv[0], *rwork; n = fftdim; if (flag) jobz = 'V'; @@ -338,7 +344,8 @@ void DynMat::EnforceASR() char *ptr = strtok(str," \t\n\r\f"); if (ptr) nasr = atoi(ptr); if (nasr < 1){ - for (int i=0; i<80; i++) printf("="); printf("\n"); + for (int i=0; i<80; i++) printf("="); + printf("\n"); return; } @@ -404,7 +411,8 @@ void DynMat::EnforceASR() if (i == 99){ printf("...... (%d more skiped)", fftdim-100); break;} } printf("\n"); - for (int i = 0; i < 80; ++i) printf("="); printf("\n\n"); + for (int i = 0; i < 80; ++i) printf("="); + printf("\n\n"); return; } @@ -456,7 +464,7 @@ return; * --------------------------------------------------------------------*/ void DynMat::GaussJordan(int n, double *Mat) { - int i,icol,irow,j,k,l,ll,idr,idc; + int i,icol=0,irow=0,j,k,l,ll,idr,idc; int *indxc,*indxr,*ipiv; double big, nmjk; double dum, pivinv; diff --git a/tools/phonon/dynmat.h b/tools/phonon/dynmat.h index 1d6e716584e86c8a8fc34407254f8daeec6c1e49..f5bd4010b8739299b219121d4c66f685c883d4ef 100644 --- a/tools/phonon/dynmat.h +++ b/tools/phonon/dynmat.h @@ -7,11 +7,6 @@ #include "memory.h" #include "interpolate.h" -extern "C"{ -#include "f2c.h" -#include "clapack.h" -} - using namespace std; class DynMat { diff --git a/tools/phonon/green.cpp b/tools/phonon/green.cpp index 8f8946dc4faaaa5ec16d0abda28e7c7abc5f2bf2..35514c03fbb640528c2939cd667c167e9930aaf0 100644 --- a/tools/phonon/green.cpp +++ b/tools/phonon/green.cpp @@ -224,7 +224,6 @@ void Green::recursion() { // local variables std::complex<double> Z, rec_x, rec_x_inv; - std::complex<double> cunit = std::complex<double>(0.,1.); double w = wmin; diff --git a/tools/phonon/interpolate.cpp b/tools/phonon/interpolate.cpp index 8c0cbde1cec52e605f17e0c6b1359334d25d4c91..954062d4159f4dae018a4330b9c5522cb5c674f1 100644 --- a/tools/phonon/interpolate.cpp +++ b/tools/phonon/interpolate.cpp @@ -1,7 +1,125 @@ #include "interpolate.h" -#include "math.h" +#include <math.h> #include "global.h" +/////////////////////// +// tricubic library code +static int A[64][64] = { +{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-3, 3, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 2,-2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 9,-9,-9, 9, 0, 0, 0, 0, 6, 3,-6,-3, 0, 0, 0, 0, 6,-6, 3,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-6, 6, 6,-6, 0, 0, 0, 0,-3,-3, 3, 3, 0, 0, 0, 0,-4, 4,-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-2,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-6, 6, 6,-6, 0, 0, 0, 0,-4,-2, 4, 2, 0, 0, 0, 0,-3, 3,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 4,-4,-4, 4, 0, 0, 0, 0, 2, 2,-2,-2, 0, 0, 0, 0, 2,-2, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,-9,-9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3,-6,-3, 0, 0, 0, 0, 6,-6, 3,-3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3,-3, 3, 3, 0, 0, 0, 0,-4, 4,-2, 2, 0, 0, 0, 0,-2,-2,-1,-1, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4,-2, 4, 2, 0, 0, 0, 0,-3, 3,-3, 3, 0, 0, 0, 0,-2,-1,-2,-1, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4,-4,-4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,-2,-2, 0, 0, 0, 0, 2,-2, 2,-2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}, +{-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 9,-9, 0, 0,-9, 9, 0, 0, 6, 3, 0, 0,-6,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,-6, 0, 0, 3,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-6, 6, 0, 0, 6,-6, 0, 0,-3,-3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 4, 0, 0,-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-2, 0, 0,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,-9, 0, 0,-9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0,-6,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,-6, 0, 0, 3,-3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 0, 0, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3,-3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 4, 0, 0,-2, 2, 0, 0,-2,-2, 0, 0,-1,-1, 0, 0}, +{ 9, 0,-9, 0,-9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0,-6, 0,-3, 0, 6, 0,-6, 0, 3, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,-9, 0,-9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0,-6, 0,-3, 0, 6, 0,-6, 0, 3, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0}, +{-27,27,27,-27,27,-27,-27,27,-18,-9,18, 9,18, 9,-18,-9,-18,18,-9, 9,18,-18, 9,-9,-18,18,18,-18,-9, 9, 9,-9,-12,-6,-6,-3,12, 6, 6, 3,-12,-6,12, 6,-6,-3, 6, 3,-12,12,-6, 6,-6, 6,-3, 3,-8,-4,-4,-2,-4,-2,-2,-1}, +{18,-18,-18,18,-18,18,18,-18, 9, 9,-9,-9,-9,-9, 9, 9,12,-12, 6,-6,-12,12,-6, 6,12,-12,-12,12, 6,-6,-6, 6, 6, 6, 3, 3,-6,-6,-3,-3, 6, 6,-6,-6, 3, 3,-3,-3, 8,-8, 4,-4, 4,-4, 2,-2, 4, 4, 2, 2, 2, 2, 1, 1}, +{-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0,-3, 0, 3, 0, 3, 0,-4, 0, 4, 0,-2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-2, 0,-1, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0,-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0,-3, 0, 3, 0, 3, 0,-4, 0, 4, 0,-2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-2, 0,-1, 0,-1, 0}, +{18,-18,-18,18,-18,18,18,-18,12, 6,-12,-6,-12,-6,12, 6, 9,-9, 9,-9,-9, 9,-9, 9,12,-12,-12,12, 6,-6,-6, 6, 6, 3, 6, 3,-6,-3,-6,-3, 8, 4,-8,-4, 4, 2,-4,-2, 6,-6, 6,-6, 3,-3, 3,-3, 4, 2, 4, 2, 2, 1, 2, 1}, +{-12,12,12,-12,12,-12,-12,12,-6,-6, 6, 6, 6, 6,-6,-6,-6, 6,-6, 6, 6,-6, 6,-6,-8, 8, 8,-8,-4, 4, 4,-4,-3,-3,-3,-3, 3, 3, 3, 3,-4,-4, 4, 4,-2,-2, 2, 2,-4, 4,-4, 4,-2, 2,-2, 2,-2,-2,-2,-2,-1,-1,-1,-1}, +{ 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{-6, 6, 0, 0, 6,-6, 0, 0,-4,-2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 4,-4, 0, 0,-4, 4, 0, 0, 2, 2, 0, 0,-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 0, 0, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4,-2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0,-2,-1, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4,-4, 0, 0,-4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0}, +{-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 0,-2, 0, 4, 0, 2, 0,-3, 0, 3, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0,-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 0,-2, 0, 4, 0, 2, 0,-3, 0, 3, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0,-2, 0,-1, 0}, +{18,-18,-18,18,-18,18,18,-18,12, 6,-12,-6,-12,-6,12, 6,12,-12, 6,-6,-12,12,-6, 6, 9,-9,-9, 9, 9,-9,-9, 9, 8, 4, 4, 2,-8,-4,-4,-2, 6, 3,-6,-3, 6, 3,-6,-3, 6,-6, 3,-3, 6,-6, 3,-3, 4, 2, 2, 1, 4, 2, 2, 1}, +{-12,12,12,-12,12,-12,-12,12,-6,-6, 6, 6, 6, 6,-6,-6,-8, 8,-4, 4, 8,-8, 4,-4,-6, 6, 6,-6,-6, 6, 6,-6,-4,-4,-2,-2, 4, 4, 2, 2,-3,-3, 3, 3,-3,-3, 3, 3,-4, 4,-2, 2,-4, 4,-2, 2,-2,-2,-1,-1,-2,-2,-1,-1}, +{ 4, 0,-4, 0,-4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,-2, 0,-2, 0, 2, 0,-2, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +{ 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,-4, 0,-4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,-2, 0,-2, 0, 2, 0,-2, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0}, +{-12,12,12,-12,12,-12,-12,12,-8,-4, 8, 4, 8, 4,-8,-4,-6, 6,-6, 6, 6,-6, 6,-6,-6, 6, 6,-6,-6, 6, 6,-6,-4,-2,-4,-2, 4, 2, 4, 2,-4,-2, 4, 2,-4,-2, 4, 2,-3, 3,-3, 3,-3, 3,-3, 3,-2,-1,-2,-1,-2,-1,-2,-1}, +{ 8,-8,-8, 8,-8, 8, 8,-8, 4, 4,-4,-4,-4,-4, 4, 4, 4,-4, 4,-4,-4, 4,-4, 4, 4,-4,-4, 4, 4,-4,-4, 4, 2, 2, 2, 2,-2,-2,-2,-2, 2, 2,-2,-2, 2, 2,-2,-2, 2,-2, 2,-2, 2,-2, 2,-2, 1, 1, 1, 1, 1, 1, 1, 1}}; + +static int ijk2n(int i, int j, int k) { + return(i+4*j+16*k); +} + +/* ---------------------------------------------------------------------------- */ + +static void tricubic_get_coeff_stacked(double a[64], double x[64]) { + int i,j; + for (i=0;i<64;i++) { + a[i]=(double)(0.0); + for (j=0;j<64;j++) { + a[i]+=A[i][j]*x[j]; + } + } +} + +static void tricubic_get_coeff(double a[64], double f[8], double dfdx[8], double dfdy[8], double dfdz[8], double d2fdxdy[8], double d2fdxdz[8], double d2fdydz[8], double d3fdxdydz[8]) { + int i; + double x[64]; + for (i=0;i<8;i++) { + x[0+i]=f[i]; + x[8+i]=dfdx[i]; + x[16+i]=dfdy[i]; + x[24+i]=dfdz[i]; + x[32+i]=d2fdxdy[i]; + x[40+i]=d2fdxdz[i]; + x[48+i]=d2fdydz[i]; + x[56+i]=d3fdxdydz[i]; + } + tricubic_get_coeff_stacked(a,x); +} + +static double tricubic_eval(double a[64], double x, double y, double z) { + int i,j,k; + double ret=(double)(0.0); + /* TRICUBIC EVAL + This is the short version of tricubic_eval. It is used to compute + the value of the function at a given point (x,y,z). To compute + partial derivatives of f, use the full version with the extra args. + */ + for (i=0;i<4;i++) { + for (j=0;j<4;j++) { + for (k=0;k<4;k++) { + ret+=a[ijk2n(i,j,k)]*pow(x,i)*pow(y,j)*pow(z,k); + } + } + } + return(ret); +} + /* ---------------------------------------------------------------------------- * Constructor used to get info from caller, and prepare other necessary data * ---------------------------------------------------------------------------- */ @@ -274,7 +392,8 @@ void Interpolate::set_method() which =2-im%2; printf("Your selection: %d\n", which); - for(int i=0; i<80; i++) printf("="); printf("\n\n"); + for(int i=0; i<80; i++) printf("="); + printf("\n\n"); if (which == 1) tricubic_init(); @@ -306,4 +425,3 @@ void Interpolate::reset_gamma() return; } -/* ---------------------------------------------------------------------------- */ diff --git a/tools/phonon/interpolate.h b/tools/phonon/interpolate.h index e192fcac87ef6d7b8be3f741075c8db94583ca61..04a358ae711180ca35cbc9bc75f1a6def233185f 100644 --- a/tools/phonon/interpolate.h +++ b/tools/phonon/interpolate.h @@ -5,11 +5,8 @@ #include "stdlib.h" #include "string.h" #include "memory.h" -#include <tricubic.h> -extern "C"{ -#include "f2c.h" -#include "clapack.h" -} + +extern "C" typedef struct { double r, i; } doublecomplex; using namespace std; diff --git a/tools/phonon/phonon.cpp b/tools/phonon/phonon.cpp index 43bea111b426dc23f1c0ce22b59bf14e36d1a145..065885cf3f7a08e0518871b5418369617ed7d3b0 100644 --- a/tools/phonon/phonon.cpp +++ b/tools/phonon/phonon.cpp @@ -42,7 +42,8 @@ Phonon::Phonon(DynMat *dm) printf("\n"); for (int i = 0; i < 37; ++i) printf("="); printf(" Menu "); - for (int i = 0; i < 37; ++i) printf("="); printf("\n"); + for (int i = 0; i < 37; ++i) printf("="); + printf("\n"); printf(" 1. Phonon DOS evaluation;\n"); printf(" 2. Phonon dispersion curves;\n"); printf(" 3. Dynamical matrix at arbitrary q;\n"); @@ -60,7 +61,8 @@ Phonon::Phonon(DynMat *dm) printf("Your choice [0]: "); if (count_words(fgets(str,MAXLINE,stdin)) > 0) job = atoi(strtok(str," \t\n\r\f")); printf("\nYour selection: %d\n", job); - for (int i = 0; i < 80; ++i) printf("=");printf("\n\n"); + for (int i = 0; i < 80; ++i) printf("="); + printf("\n\n"); // now to do the job according to user's choice if (job == 1) pdos(); @@ -414,7 +416,8 @@ void Phonon::vfanyq() dynmat->geteigen(egvs, 0); printf("q-point: [%lg %lg %lg], ", q[0], q[1], q[2]); printf("vibrational frequencies at this q-point:\n"); - for (int i = 0; i < ndim; ++i) printf("%lg ", egvs[i]); printf("\n\n"); + for (int i = 0; i < ndim; ++i) printf("%lg ", egvs[i]); + printf("\n\n"); } return; @@ -1001,7 +1004,8 @@ void Phonon::ShowCell() printf("\n"); for (int i = 0; i < 30; ++i) printf("="); printf(" Unit Cell Info "); - for (int i = 0; i < 30; ++i) printf("="); printf("\n"); + for (int i = 0; i < 30; ++i) printf("="); + printf("\n"); printf("Number of atoms in the unit cell: %d\n", dynmat->nucell); printf("Basis vectors of the unit cell:\n"); printf(" %15.8f %15.8f %15.8f\n", dynmat->basevec[0], dynmat->basevec[1], dynmat->basevec[2]); @@ -1091,7 +1095,7 @@ int Phonon::count_words(const char *line) strcpy(copy,line); char *ptr; - if (ptr = strchr(copy,'#')) *ptr = '\0'; + if ((ptr = strchr(copy,'#'))) *ptr = '\0'; if (strtok(copy," \t\n\r\f") == NULL) { memory->destroy(copy); diff --git a/tools/phonon/version.h b/tools/phonon/version.h index 8ed0e80aa7035405dafcc3c900296a4c6e7e8552..decab631b0a05e0ffdc0cb6092d52d66cd601e82 100644 --- a/tools/phonon/version.h +++ b/tools/phonon/version.h @@ -1 +1 @@ -#define VERSION 7 +#define VERSION 8